From d6ca424ea2545945e9ce38fdebff8210d3e70d98 Mon Sep 17 00:00:00 2001 From: Sears Russell Date: Sat, 27 May 2006 02:45:29 +0000 Subject: [PATCH] First stab at automated benchmark execution; broke compatibility with old plotter.pl, timing.pl, and added a Makefile that attempts to intelligently manage binaries, and benchmarking data, with support for interruputed runs, etc. Also, extended the .def format to include enough information to run the benchmarks --- benchmarks/nightly/Makefile | 81 ++++++++ benchmarks/nightly/defs/BULK_LOAD.def | 8 + benchmarks/nightly/defs/BULK_LOAD_RAW.def | 3 + benchmarks/nightly/defs/LLADD_HASH_TPS.def | 3 + benchmarks/nightly/defs/TPS.def | 3 + benchmarks/nightly/generateGraphData | 17 ++ benchmarks/nightly/plot | 222 +++++++++++++++++++++ benchmarks/nightly/timer | 202 +++++++++++++++++++ 8 files changed, 539 insertions(+) create mode 100644 benchmarks/nightly/Makefile create mode 100644 benchmarks/nightly/defs/BULK_LOAD.def create mode 100644 benchmarks/nightly/defs/BULK_LOAD_RAW.def create mode 100644 benchmarks/nightly/defs/LLADD_HASH_TPS.def create mode 100644 benchmarks/nightly/defs/TPS.def create mode 100755 benchmarks/nightly/generateGraphData create mode 100755 benchmarks/nightly/plot create mode 100755 benchmarks/nightly/timer diff --git a/benchmarks/nightly/Makefile b/benchmarks/nightly/Makefile new file mode 100644 index 0000000..6cb4d86 --- /dev/null +++ b/benchmarks/nightly/Makefile @@ -0,0 +1,81 @@ + +# This lets us keep the .def files in their own directory, but treat +# them as though they're in the current (per run) directory. +#VPATH=../defs/ + +VPATH=../..:../../berkeleyDB + +GRAPHS=BULK_LOAD +# BULK_LOAD_RAW LLADD_HASH_TPS TPS +BINARIES=linearHashNTA logicalHash bdbHash + +all-binaries = $(patsubst %, %.exe, ${BINARIES}) +all-r-files = $(patsubst %, %.R, $(GRAPHS)) +all-defs = $(patsubst %, %.def, $(GRAPHS)) + +MAKEFLAGS=--no-print-directory +RFLAGS=--no-save --no-load + +all: all-graphs all-results ${all-binaries} + +all-graphs : all-png-graphs all-pdf-graphs all-ps-graphs + +all-results : $(patsubst %, %.results, $(GRAPHS)) +all-png-graphs : $(patsubst %, %.png, $(GRAPHS)) +all-pdf-graphs : $(patsubst %, %.pdf, $(GRAPHS)) +all-ps-graphs : $(patsubst %, %.ps, $(GRAPHS)) + +# XXX the "|| true" is a hack; grep -v returns failure if there +# are no matching lines.... Really, we should check to see if it returned 2. +%.results : %.def ${all-binaries} + $(MAKE) $(MAKEFLAGS) `grep Data-Series\: $< | \ + perl -ne 's/[\"\s]+/\t/g;print;print"\n"' | \ + cut -f2 | \ + perl -ne 'chomp;print"$*-";print;print".dat\n"'` \ + | grep -v 'is up to date.' || true + +%.png: %.results %.def + ../plot --format png $* | R $(RFLAGS) > /dev/null +%.pdf: %.results %.def + ../plot --format pdf $* | R $(RFLAGS) > /dev/null +%.ps: %.results %.def + ../plot --format ps $* | R $(RFLAGS) > /dev/null + +%.def : defs/%.def + ln -s $< . + +%.dat : ${all-binaries} ${all-defs} + ../generateGraphData $* + cp defs/$@ $@ + +%.R : %.def + plotting.pl $* > $*.R + +clean : + rm -f *.def *.R *.png *.pdf *.ps +veryclean : clean + rm -f *.exe *.dat *.results binaries + +binaries: + touch binaries +# "binaries" is an empty file that's newer than the executables in +# this directory. It is used as an empty target, and records the time +# at which this run was initiated. (It is 'built' from the +# executables by copying them into the current directory, and touching +# 'binaries'. This prevents new binaries from inadvertantly being +# copied into archived benchmark directories. + +# Note that this makefile recursively calls itself... there must be a +# better way. The problem is that we need to prevent make from +# realizing that we build the .exe from the original binary. +# Otherwise, it would copy over the archived binary when we recompile... +%.exe : binaries + $(MAKE) $(MAKEFLAGS) $*.exe.tmp + mv $*.exe.tmp $*.exe + +%.exe.tmp : % + cp $< $*.exe.tmp + +benchmarks : binaries $(patsubst %.exe, %.dat, $(wildcard *.exe)) + touch benchmarks + diff --git a/benchmarks/nightly/defs/BULK_LOAD.def b/benchmarks/nightly/defs/BULK_LOAD.def new file mode 100644 index 0000000..5ad5a12 --- /dev/null +++ b/benchmarks/nightly/defs/BULK_LOAD.def @@ -0,0 +1,8 @@ +Title: Bulk Load Time - Single Transaction +Plot-Type: 2D Line +X-Label: Insertions +Y-Label: Seconds +X-Range: qw(10 50 100 500 1000) +Data-Series: "LINEAR_HASH_NTA" linearHashNTA 1 $x +Data-Series: "Linear_Hash" logicalHash 1 $x +Data-Series: "Berkeley_DB" bdbHash 1 $x diff --git a/benchmarks/nightly/defs/BULK_LOAD_RAW.def b/benchmarks/nightly/defs/BULK_LOAD_RAW.def new file mode 100644 index 0000000..0d424c0 --- /dev/null +++ b/benchmarks/nightly/defs/BULK_LOAD_RAW.def @@ -0,0 +1,3 @@ +Title: Bulk Load Time - Single Transaction +X-Label: Insertions +Y-Label: Seconds diff --git a/benchmarks/nightly/defs/LLADD_HASH_TPS.def b/benchmarks/nightly/defs/LLADD_HASH_TPS.def new file mode 100644 index 0000000..f623f9f --- /dev/null +++ b/benchmarks/nightly/defs/LLADD_HASH_TPS.def @@ -0,0 +1,3 @@ +Title: Concurrent Transactions Per Second +X-Label: Number of concurrent requests +Y-Label: TPS diff --git a/benchmarks/nightly/defs/TPS.def b/benchmarks/nightly/defs/TPS.def new file mode 100644 index 0000000..f623f9f --- /dev/null +++ b/benchmarks/nightly/defs/TPS.def @@ -0,0 +1,3 @@ +Title: Concurrent Transactions Per Second +X-Label: Number of concurrent requests +Y-Label: TPS diff --git a/benchmarks/nightly/generateGraphData b/benchmarks/nightly/generateGraphData new file mode 100755 index 0000000..35a4019 --- /dev/null +++ b/benchmarks/nightly/generateGraphData @@ -0,0 +1,17 @@ +#!/usr/bin/perl -w + +use strict; + +my $dataSeries = shift @ARGV; + +my @tok = split /\-/, $dataSeries; + +my $usage = "Usage $0 GRAPH_NAME-SERIES_NAME\n"; + +@tok == 2 || die $usage; +#(-r $tok[0]) || (-r $tok[0]) || die ("Couldn't open ."$tok[0]."\n"); +my $seriesLine = `grep Data-Series\: $tok[0].def | grep $tok[1]` + || die "No Data-Series entry\n"; +my $range = `grep X-Range\: $tok[0].def` || die "No X-Range entry\n"; + +warn "Series line:\t$seriesLine\rRange:$range"; diff --git a/benchmarks/nightly/plot b/benchmarks/nightly/plot new file mode 100755 index 0000000..0501773 --- /dev/null +++ b/benchmarks/nightly/plot @@ -0,0 +1,222 @@ +#! /usr/bin/perl -w + +use strict; + +#my $R_PREAMBLE = qq{ +# postscript("__PLOT_NAME__.ps", width = 9.75, height = 6.375, +# horizontal = FALSE, onefile = FALSE, paper = "special", +# family = "ComputerModern") +# }; +my $R_PREAMBLE; + +my $outputFormat = "pdf"; + +my $width = 5; +my $height = 4; +my $dpi = 100; + +sub set_format { + my $format = shift; + if($format eq "pdf") { + $R_PREAMBLE = qq{ + pdf("__PLOT_NAME__.pdf", width = $width, height = $height, + horizontal = FALSE, onefile = FALSE, paper = "special") + + }; +# family = "ComputerModern") + } elsif ($format eq "ps") { + $R_PREAMBLE = qq{ + postscript("__PLOT_NAME__.ps", width = $width, height = $height, + horizontal = FALSE, onefile = FALSE, paper = "special", + family = "ComputerModern") + }; + } elsif ($format eq "png") { + my $pwidth = $width * $dpi; + my $pheight = $height * $dpi; + $R_PREAMBLE = qq{ + png(filename = "__PLOT_NAME__.png", width = $pwidth, height = $pheight, + pointsize = 12, bg = "white", res = $dpi) + }; + } else { + die ("Unknown format. Known formats are: pdf ps png\n"); + } +} +my $R_READ = qq{ + __NAME__<-read.table("__FILE__",header=F,sep="\\t") + }; + +my $R_FIRST_PLOT = qq{ + plot(__NAME__\$V1, __NAME__\$V2, type="b", + xlim=c(0,max(__NAME_X_LIST__)), + ylim=c(0, max(__NAME_Y_LIST__)), + xlab="__XLAB__", ylab="__YLAB__", + main="__TITLE__", pch=1, col=1) + }; + +my $R_NEXT_PLOT = qq{ + lines(__NAME__\$V1, __NAME__\$V2, type="b", pch=__N__+1, col=__N__+1) + }; + +my $R_LEGEND = qq{ + legend((0 * max(__NAME_X_LIST__)), max(__NAME_Y_LIST__), legend=c(__NAME_LABEL_LIST__), bty="n", pch=c(1:__COUNT__), col=c(1:__COUNT__)) +}; + +my $R_FINISH = qq{ + dev.off() + }; + +sub replaceAll { + my $cmd = shift; + my $arglist_ref = shift; + my %arglist = %{$arglist_ref}; + + foreach my $i (keys %arglist) { + $cmd =~ s/$i/$arglist{$i}/g; + } + return $cmd; +} + +sub getConfig { + my $basename = shift; + my $key = shift; + my $value = `grep ^$key: $basename.def`; + $value =~ s/^$key\:\s+//; + chomp $value; + return $value +} + +sub filesToLabels { + my $array_ref = shift; + + my @array = @{$array_ref}; + + for(my $i = 0; $i < @array; $i++) { + chomp $array[$i]; + $array[$i] =~ s/^[^\-]+\-//; + $array[$i] =~ s/\.dat$//; + $array[$i] =~ s/[-_]/ /g; + } + + my $ret = '"' . join ('", "', @array). '"'; + + return $ret; +} + +sub labelsToVars { + my $i = shift; + + $i =~ s/[^A-Za-z0-9\"\,]//g; + + $i =~ s/\,/, /g; + + return $i; +} + +sub varsToXList { + my $i = shift; + + $i =~ s/\"\,/\$V1\",/g; + $i =~ s/\"$/\$V1\"/; + + $i =~ s/\"//g; + + return $i; +} +sub varsToYList { + my $i = shift; + + $i =~ s/\"\,/\$V2\",/g; + $i =~ s/\"$/\$V2\"/; + $i =~ s/\"//g; + + return $i; +} + +my %vals; + +my $usage = "Usage: $0 [--format (pdf|ps|png)] [--width in] [--height -in] GRAPH_NAME\n"; + +my $basename; +while (@ARGV) { + if($ARGV[0] eq '--format') { + shift @ARGV; + $outputFormat = shift @ARGV; + } elsif ($ARGV[0] eq '--width') { + shift @ARGV; + $width = shift @ARGV; + } elsif ($ARGV[0] eq '--height') { + shift @ARGV; + $height = shift @ARGV; + } elsif ($ARGV[0] eq '--dpi') { + shift @ARGV; + $dpi = shift @ARGV; + } else { + defined($basename) + && die ("Multiple graphs specified. Invalid parameter?\n$usage"); + $basename = shift(@ARGV); + } +} + +defined($basename) || die $usage; + +set_format($outputFormat); + + + +my @files = `ls $basename-*.dat`; + +for(my $i =0; $i < @files; $i++) { + chomp $files[$i]; +} + + +$vals{__PLOT_NAME__} = $basename; +$vals{__XLAB__} = getConfig($basename, "X-Label"); +$vals{__YLAB__} = getConfig($basename, "Y-Label"); +$vals{__TITLE__} = getConfig($basename, "Title"); +$vals{__NAME_LABEL_LIST__} = filesToLabels(\@files); + +my $vars = labelsToVars($vals{__NAME_LABEL_LIST__}); + +$vals{__NAME_X_LIST__} = varsToXList($vars); +$vals{__NAME_Y_LIST__} = varsToYList($vars); +$vals{__COUNT__} = @files; + +my @names = split /[\"\,\s]+/, $vars; + +if($names[0] =~ /^\s*$/) { + shift @names; +} + + +$vals{__FILE__} = $files[0]; +$vals{__NAME__} = $names[0]; +$vals{__N__} = 0; + +print replaceAll($R_PREAMBLE, \%vals); +print replaceAll($R_READ, \%vals); + +for(my $n = 1 ; $n < @names; $n++) { + $vals{__FILE__} = $files[$n]; + $vals{__NAME__} = $names[$n]; + print replaceAll($R_READ, \%vals); +} + +$vals{__FILE__} = $files[0]; +$vals{__NAME__} = $names[0]; + + +print replaceAll($R_FIRST_PLOT, \%vals); + +for(my $n = 1 ; $n < @names; $n++) { + $vals{__FILE__} = $files[$n]; + $vals{__NAME__} = $names[$n]; + $vals{__N__} = $n; + + print replaceAll($R_NEXT_PLOT, \%vals); + +} + +print replaceAll($R_LEGEND, \%vals); +print replaceAll($R_FINISH, \%vals); + diff --git a/benchmarks/nightly/timer b/benchmarks/nightly/timer new file mode 100755 index 0000000..471be03 --- /dev/null +++ b/benchmarks/nightly/timer @@ -0,0 +1,202 @@ +#! /usr/bin/perl -w + +use strict; + + +my $CI80 = 1; +my $CI90 = 2; +my $CI95 = 3; +my $CI98 = 4; +my $CI99 = 5; +my $CI99_8 = 6; +my $CI99_9 = 7; + +my $tdistribution = +qq(1 3.078 6.314 12.71 31.82 63.66 318.3 637 1 +2 1.886 2.920 4.303 6.965 9.925 22.330 31.6 2 +3 1.638 2.353 3.182 4.541 5.841 10.210 12.92 3 +4 1.533 2.132 2.776 3.747 4.604 7.173 8.610 4 +5 1.476 2.015 2.571 3.365 4.032 5.893 6.869 5 +6 1.440 1.943 2.447 3.143 3.707 5.208 5.959 6 +7 1.415 1.895 2.365 2.998 3.499 4.785 5.408 7 +8 1.397 1.860 2.306 2.896 3.355 4.501 5.041 8 +9 1.383 1.833 2.262 2.821 3.250 4.297 4.781 9 +10 1.372 1.812 2.228 2.764 3.169 4.144 4.587 10 +11 1.363 1.796 2.201 2.718 3.106 4.025 4.437 11 +12 1.356 1.782 2.179 2.681 3.055 3.930 4.318 12 +13 1.350 1.771 2.160 2.650 3.012 3.852 4.221 13 +14 1.345 1.761 2.145 2.624 2.977 3.787 4.140 14 +15 1.341 1.753 2.131 2.602 2.947 3.733 4.073 15 +16 1.337 1.746 2.120 2.583 2.921 3.686 4.015 16 +17 1.333 1.740 2.110 2.567 2.898 3.646 3.965 17 +18 1.330 1.734 2.101 2.552 2.878 3.610 3.922 18 +19 1.328 1.729 2.093 2.539 2.861 3.579 3.883 19 +20 1.325 1.725 2.086 2.528 2.845 3.552 3.850 20 +21 1.323 1.721 2.080 2.518 2.831 3.527 3.819 21 +22 1.321 1.717 2.074 2.508 2.819 3.505 3.792 22 +23 1.319 1.714 2.069 2.500 2.807 3.485 3.768 23 +24 1.318 1.711 2.064 2.492 2.797 3.467 3.745 24 +25 1.316 1.708 2.060 2.485 2.787 3.450 3.725 25 +26 1.315 1.706 2.056 2.479 2.779 3.435 3.707 26 +27 1.314 1.703 2.052 2.473 2.771 3.421 3.690 27 +28 1.313 1.701 2.048 2.467 2.763 3.408 3.674 28 +29 1.311 1.699 2.045 2.462 2.756 3.396 3.659 29 +30 1.310 1.697 2.042 2.457 2.750 3.385 3.646 30 +32 1.309 1.694 2.037 2.449 2.738 3.365 3.622 32 +34 1.307 1.691 2.032 2.441 2.728 3.348 3.601 34 +36 1.306 1.688 2.028 2.434 2.719 3.333 3.582 36 +38 1.304 1.686 2.024 2.429 2.712 3.319 3.566 38 +40 1.303 1.684 2.021 2.423 2.704 3.307 3.551 40 +42 1.302 1.682 2.018 2.418 2.698 3.296 3.538 42 +44 1.301 1.680 2.015 2.414 2.692 3.286 3.526 44 +46 1.300 1.679 2.013 2.410 2.687 3.277 3.515 46 +48 1.299 1.677 2.011 2.407 2.682 3.269 3.505 48 +50 1.299 1.676 2.009 2.403 2.678 3.261 3.496 50 +55 1.297 1.673 2.004 2.396 2.668 3.245 3.476 55 +60 1.296 1.671 2.000 2.390 2.660 3.232 3.460 60 +65 1.295 1.669 1.997 2.385 2.654 3.220 3.447 65 +70 1.294 1.667 1.994 2.381 2.648 3.211 3.435 70 +80 1.292 1.664 1.990 2.374 2.639 3.195 3.416 80 +100 1.290 1.660 1.984 2.364 2.626 3.174 3.390 100 +150 1.287 1.655 1.976 2.351 2.609 3.145 3.357 150 +200 1.286 1.653 1.972 2.345 2.601 3.131 3.340 200 +); + +### How tight should the CI be? +my $myCI = $CI95; + +my $maxRuns = 10; +my $plusMinus = 0.05; +my $use_gettimeofday = 1; + +my @ttbl; + +sub parse_t_distribution { + ## Takes the $tdistribution string and parses it into a nice, fat array. + + my $i = 0; + my @lines = split /\n/, $tdistribution; + foreach my $line (@lines) { + my @tok = split /\s+/, $line; + pop @tok; ## Remove trailing n. + + while($i < $tok[0]) { + push @ttbl, \@tok; + $i++; +# print("$i " . join ("-", @tok) . "\n"); + } + } + +} + +sub runit_gettimeofday { + my $cmd = shift; + + `rm -rf storefile.txt logfile.txt blob0_file.txt blob1_file.txt TXNAPP bdb; sync; sleep 1`; + + my $start_sec = `getTimeOfDay`; + + system($cmd); + + my $end_sec = `getTimeOfDay`; + + chomp($start_sec); + chomp($end_sec); + + my $time = ($end_sec - $start_sec) / 1000.0; + + return $time; + +} + +sub runit_returntime { + my $cmd = shift; + + `rm -rf storefile.txt logfile.txt blob0_file.txt blob1_file.txt TXNAPP bdb; sync; sleep 1`; + + my $time = `$cmd`; + + return $time; + +} + +sub runbatch { + my $cmd = shift; + + my $sum_x = 0; + my $sum_x_squared = 0; + my $n = 0; + + my $variance = 100000; + my $mean = 0; + + my $x = 0; + my $s = 10; + + + my $max_range = 0; + my $cur_range = 1; + while( 1 ) { + if ($n > 5) { + if($maxRuns < $n) { + last; + } + if($cur_range < $max_range) { + last; + } + } + + if ($use_gettimeofday) { + $x = runit_gettimeofday($cmd); + } else { + $x = runit_returntime($cmd); + } + + $n++; + $sum_x += $x; + $sum_x_squared += ($x * $x); + + $variance = ($sum_x_squared/$n) - ($sum_x/$n)*($sum_x/$n); + $mean = $sum_x / $n; + my $s; + + my $sigma; + + if($variance <= 0) { + $s = 0; + $sigma = 0; + } else { + $s = sqrt($variance/$n); + $sigma = sqrt($variance); + } + + print("Time: $x s: $s Mean: $mean Stdev: $sigma\n"); + + $cur_range = $s * $ttbl[$n-1][$myCI]; + $max_range = $plusMinus * $mean; + + } + if($cur_range > $max_range) { + printf("CI FAILED. mean was: $mean\t$cmd\n"); + } else { + printf("CI mean was: $mean\t$cmd\n"); + } +} + + +while (@ARGV) { + if ($ARGV[0] eq "-return") { + $use_gettimeofday = 0; + shift @ARGV; + } + else { + die "unknown argument"; + } +} + +parse_t_distribution; + + +while (my $cmd = <>) { + runbatch($cmd); +}