From 786fb2e904283c5356382f7eb98e595eae2df3f4 Mon Sep 17 00:00:00 2001
From: Greg Burd <greg@burd.me>
Date: Fri, 14 Aug 2020 21:45:38 -0400
Subject: [PATCH] Initial import from Archive.org Wayback Machine. (wget
 -mkEpnp
 http://web.archive.org/web/20130420020035id_/http://www2.research.att.com/~gsf/testregex/
 and  wget -mkEpnp
 http://web.archive.org/web/20130420020018id_/http://www2.research.att.com/~gsf/man/man1/testregex.html)

---
 basic.dat               |  216 ++++
 categorize.dat          |   62 ++
 forcedassoc.dat         |   30 +
 leftassoc.dat           |   16 +
 man/man1/testregex.html |  142 +++
 nullsubexpr.dat         |   73 ++
 re-assoc.html           |   64 ++
 re-categorize.html      |  209 ++++
 re-interpretation.html  |  997 ++++++++++++++++++
 re-nullsubexpr.html     |   62 ++
 re-repetition.html      |   60 ++
 repetition.dat          |   79 ++
 rightassoc.dat          |   16 +
 testregex.c             | 2121 +++++++++++++++++++++++++++++++++++++++
 testregex.html          |  241 +++++
 15 files changed, 4388 insertions(+)
 create mode 100644 basic.dat
 create mode 100644 categorize.dat
 create mode 100644 forcedassoc.dat
 create mode 100644 leftassoc.dat
 create mode 100644 man/man1/testregex.html
 create mode 100644 nullsubexpr.dat
 create mode 100644 re-assoc.html
 create mode 100644 re-categorize.html
 create mode 100644 re-interpretation.html
 create mode 100644 re-nullsubexpr.html
 create mode 100644 re-repetition.html
 create mode 100644 repetition.dat
 create mode 100644 rightassoc.dat
 create mode 100644 testregex.c
 create mode 100644 testregex.html
diff --git a/basic.dat b/basic.dat
new file mode 100644
index 0000000..5c50f37
--- /dev/null
+++ b/basic.dat
@@ -0,0 +1,216 @@
+NOTE	all standard compliant implementations should pass these : 2002-05-31
+
+BE	abracadabra$	abracadabracadabra	(7,18)
+BE	a...b		abababbb		(2,7)
+BE	XXXXXX		..XXXXXX		(2,8)
+E	\)		()	(1,2)
+BE	a]		a]a	(0,2)
+B	}		}	(0,1)
+E	\}		}	(0,1)
+BE	\]		]	(0,1)
+B	]		]	(0,1)
+E	]		]	(0,1)
+B	{		{	(0,1)
+B	}		}	(0,1)
+BE	^a		ax	(0,1)
+BE	\^a		a^a	(1,3)
+BE	a\^		a^	(0,2)
+BE	a$		aa	(1,2)
+BE	a\$		a$	(0,2)
+BE	^$		NULL	(0,0)
+E	$^		NULL	(0,0)
+E	a($)		aa	(1,2)(2,2)
+E	a*(^a)		aa	(0,1)(0,1)
+E	(..)*(...)*		a	(0,0)
+E	(..)*(...)*		abcd	(0,4)(2,4)
+E	(ab|a)(bc|c)		abc	(0,3)(0,2)(2,3)
+E	(ab)c|abc		abc	(0,3)(0,2)
+E	a{0}b		ab			(1,2)
+E	(a*)(b?)(b+)b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	(a*)(b{0,1})(b{1,})b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	a{9876543210}	NULL	BADBR
+E	((a|a)|a)			a	(0,1)(0,1)(0,1)
+E	(a*)(a|aa)			aaaa	(0,4)(0,3)(3,4)
+E	a*(a.|aa)			aaaa	(0,4)(2,4)
+E	a(b)|c(d)|a(e)f			aef	(0,3)(?,?)(?,?)(1,2)
+E	(a|b)?.*			b	(0,1)(0,1)
+E	(a|b)c|a(b|c)			ac	(0,2)(0,1)
+E	(a|b)c|a(b|c)			ab	(0,2)(?,?)(1,2)
+E	(a|b)*c|(a|ab)*c		abc	(0,3)(1,2)
+E	(a|b)*c|(a|ab)*c		xc	(1,2)
+E	(.a|.b).*|.*(.a|.b)		xa	(0,2)(0,2)
+E	a?(ab|ba)ab			abab	(0,4)(0,2)
+E	a?(ac{0}b|ba)ab			abab	(0,4)(0,2)
+E	ab|abab				abbabab	(0,2)
+E	aba|bab|bba			baaabbbaba	(5,8)
+E	aba|bab				baaabbbaba	(6,9)
+E	(aa|aaa)*|(a|aaaaa)		aa	(0,2)(0,2)
+E	(a.|.a.)*|(a|.a...)		aa	(0,2)(0,2)
+E	ab|a				xabc	(1,3)
+E	ab|a				xxabc	(2,4)
+Ei	(Ab|cD)*			aBcD	(0,4)(2,4)
+BE	[^-]			--a		(2,3)
+BE	[a-]*			--a		(0,3)
+BE	[a-m-]*			--amoma--	(0,4)
+E	:::1:::0:|:::1:1:0:	:::0:::1:::1:::0:	(8,17)
+E	:::1:::0:|:::1:1:1:	:::0:::1:::1:::0:	(8,17)
+{E	[[:upper:]]		A		(0,1)	[[<element>]] not supported
+E	[[:lower:]]+		`az{		(1,3)
+E	[[:upper:]]+		@AZ[		(1,3)
+BE	[[-]]			[[-]]		(2,4)
+BE	[[.NIL.]]	NULL	ECOLLATE
+BE	[[=aleph=]]	NULL	ECOLLATE
+}
+BE$	\n		\n	(0,1)
+BEn$	\n		\n	(0,1)
+BE$	[^a]		\n	(0,1)
+BE$	\na		\na	(0,2)
+E	(a)(b)(c)	abc	(0,3)(0,1)(1,2)(2,3)
+BE	xxx		xxx	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 6,	(0,6)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	2/7	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 1,Feb 6	(5,11)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)
+E	a?(ab|ba)*	ababababababababababababababababababababababababababababababababababababababababa	(0,81)(79,81)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabbbbaa	(18,25)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabaa	(18,22)
+E	aaac|aabc|abac|abbc|baac|babc|bbac|bbbc	baaabbbabac	(7,11)
+BE$	.*			\x01\xff	(0,2)
+E	aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	(53,57)
+L	aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	NOMATCH
+E	a*a*a*a*a*b		aaaaaaaaab	(0,10)
+BE	^			NULL		(0,0)
+BE	$			NULL		(0,0)
+BE	^$			NULL		(0,0)
+BE	^a$			a		(0,1)
+BE	abc			abc		(0,3)
+BE	abc			xabcy		(1,4)
+BE	abc			ababc		(2,5)
+BE	ab*c			abc		(0,3)
+BE	ab*bc			abc		(0,3)
+BE	ab*bc			abbc		(0,4)
+BE	ab*bc			abbbbc		(0,6)
+E	ab+bc			abbc		(0,4)
+E	ab+bc			abbbbc		(0,6)
+E	ab?bc			abbc		(0,4)
+E	ab?bc			abc		(0,3)
+E	ab?c			abc		(0,3)
+BE	^abc$			abc		(0,3)
+BE	^abc			abcc		(0,3)
+BE	abc$			aabc		(1,4)
+BE	^			abc		(0,0)
+BE	$			abc		(3,3)
+BE	a.c			abc		(0,3)
+BE	a.c			axc		(0,3)
+BE	a.*c			axyzc		(0,5)
+BE	a[bc]d			abd		(0,3)
+BE	a[b-d]e			ace		(0,3)
+BE	a[b-d]			aac		(1,3)
+BE	a[-b]			a-		(0,2)
+BE	a[b-]			a-		(0,2)
+BE	a]			a]		(0,2)
+BE	a[]]b			a]b		(0,3)
+BE	a[^bc]d			aed		(0,3)
+BE	a[^-b]c			adc		(0,3)
+BE	a[^]b]c			adc		(0,3)
+E	ab|cd			abc		(0,2)
+E	ab|cd			abcd		(0,2)
+E	a\(b			a(b		(0,3)
+E	a\(*b			ab		(0,2)
+E	a\(*b			a((b		(0,4)
+E	((a))			abc		(0,1)(0,1)(0,1)
+E	(a)b(c)			abc		(0,3)(0,1)(2,3)
+E	a+b+c			aabbabc		(4,7)
+E	a*			aaa		(0,3)
+E	(a*)*			-		(0,0)(0,0)
+E	(a*)+			-		(0,0)(0,0)
+E	(a*|b)*			-		(0,0)(0,0)
+E	(a+|b)*			ab		(0,2)(1,2)
+E	(a+|b)+			ab		(0,2)(1,2)
+E	(a+|b)?			ab		(0,1)(0,1)
+BE	[^ab]*			cde		(0,3)
+E	(^)*			-		(0,0)(0,0)
+BE	a*			NULL		(0,0)
+E	([abc])*d		abbbcd		(0,6)(4,5)
+E	([abc])*bcd		abcd		(0,4)(0,1)
+E	a|b|c|d|e		e		(0,1)
+E	(a|b|c|d|e)f		ef		(0,2)(0,1)
+E	((a*|b))*		-		(0,0)(0,0)(0,0)
+BE	abcd*efg		abcdefg		(0,7)
+BE	ab*			xabyabbbz	(1,3)
+BE	ab*			xayabbbz	(1,2)
+E	(ab|cd)e		abcde		(2,5)(2,4)
+BE	[abhgefdc]ij		hij		(0,3)
+E	(a|b)c*d		abcd		(1,4)(1,2)
+E	(ab|ab*)bc		abc		(0,3)(0,1)
+E	a([bc]*)c*		abc		(0,3)(1,3)
+E	a([bc]*)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]+)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]*)(c+d)		abcd		(0,4)(1,2)(2,4)
+E	a[bcd]*dcdcde		adcdcde		(0,7)
+E	(ab|a)b*c		abc		(0,3)(0,2)
+E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
+BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
+E	^a(bc+|b[eh])g|.h$	abh		(1,3)
+E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
+E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
+E	(bc+d$|ef*g.|h?i(j|k))	reffgz		(1,6)(1,6)
+E	(((((((((a)))))))))	a		(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
+BE	multiple words		multiple words yeah	(0,14)
+E	(.*)c(.*)		abcde		(0,5)(0,2)(3,5)
+BE	abcd			abcd		(0,4)
+E	a(bc)d			abcd		(0,4)(1,3)
+E	a[-]?c		ac		(0,3)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mo'ammar Gadhafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Kaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qadhafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gadafi	(0,14)(?,?)(10,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moamar Gaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadhdhafi	(0,18)(?,?)(13,15)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Khaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafy	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muamar Kaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Quathafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gheddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Khadafy	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Qudhafi	(0,15)(?,?)(10,12)
+E	a+(b|c)*d+		aabcdd			(0,6)(3,4)
+E	^.+$			vivi			(0,4)
+E	^(.+)$			vivi			(0,4)(0,4)
+E	^([^!.]+).att.com!(.+)$	gryphon.att.com!eby	(0,19)(0,7)(16,19)
+E	^([^!]+!)?([^!]+)$	bas			(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$	bar!bas			(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$	foo!bas			(0,7)(0,4)(4,7)
+E	^.+!([^!]+!)([^!]+)$	foo!bar!bas		(0,11)(4,8)(8,11)
+E	((foo)|(bar))!bas	bar!bas			(0,7)(0,3)(?,?)(0,3)
+E	((foo)|(bar))!bas	foo!bar!bas		(4,11)(4,7)(?,?)(4,7)
+E	((foo)|(bar))!bas	foo!bas			(0,7)(0,3)(0,3)
+E	((foo)|bar)!bas		bar!bas			(0,7)(0,3)
+E	((foo)|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	((foo)|bar)!bas		foo!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		bar!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		foo!bar!bas		(4,11)(4,7)(4,7)
+E	(foo|(bar))!bas		foo!bas			(0,7)(0,3)
+E	(foo|bar)!bas		bar!bas			(0,7)(0,3)
+E	(foo|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	(foo|bar)!bas		foo!bas			(0,7)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bas		(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bar!bas		(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bar!bas	(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bas		(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bas		(0,3)(0,3)(?,?)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bar!bas		(0,7)(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bas		(0,7)(0,7)(0,4)(4,7)
+E	.*(/XXX).*			/XXX			(0,4)(0,4)
+E	.*(\\XXX).*			\XXX			(0,4)(0,4)
+E	\\XXX				\XXX			(0,4)
+E	.*(/000).*			/000			(0,4)(0,4)
+E	.*(\\000).*			\000			(0,4)(0,4)
+E	\\000				\000			(0,4)
diff --git a/categorize.dat b/categorize.dat
new file mode 100644
index 0000000..d348512
--- /dev/null
+++ b/categorize.dat
@@ -0,0 +1,62 @@
+NOTE	regex implementation categorization 2004-05-31
+
+?E	aa*			xaxaax	(1,2)					POSITION=leftmost
+;										POSITION=bug
+
+?E	(a*)(ab)*(b*)		abc	(0,2)(0,1)(?,?)(1,2)			ASSOCIATIVITY=right
+|E	(a*)(ab)*(b*)		abc	(0,2)(0,0)(0,2)(2,2)			ASSOCIATIVITY=left
+;										ASSOCIATIVITY=bug
+
+?E	((a*)(ab)*)((b*)(a*))	aba	(0,3)(0,2)(0,0)(0,2)(2,3)(2,2)(2,3)	SUBEXPRESSION=precedence
+|E	((a*)(ab)*)((b*)(a*))	aba	(0,3)(0,1)(0,1)(?,?)(1,3)(1,2)(2,3)	SUBEXPRESSION=grouping
+;										SUBEXPRESSION=bug
+
+?E	(...?.?)*		xxxxxx	(0,6)(4,6)				REPEAT_LONGEST=first
+|E	(...?.?)*		xxxxxx	(0,6)(2,6)				REPEAT_LONGEST=last
+|E	(...?.?)*		xxxxxx	OK					REPEAT_LONGEST=unknown
+;										REPEAT_LONGEST=bug
+
+?E	(a|ab)(bc|c)		abcabc	(0,3)(0,2)(2,3)				EXPECTED
+|E	(a|ab)(bc|c)		abcabc	(0,3)(0,1)(1,3)				BUG=alternation-order
+;										BUG=alternation-order-UNKNOWN
+
+?E	(aba|a*b)(aba|a*b)	ababa	(0,5)(0,2)(2,5)				EXPECTED
+|E	(aba|a*b)(aba|a*b)	ababa	(0,4)(0,3)(3,4)				BUG=first-match
+;										BUG=unknown-match
+
+?B	a\(b\)*\1		a	NOMATCH					EXPECTED
+|B	a\(b\)*\1		a	(0,1)					BUG=nomatch-match
+|B	a\(b\)*\1		abab	(0,2)(1,2)				# BUG=repeat-any
+;										BUG=nomatch-match-UNKNOWN
+
+?E	(a*){2}			xxxxx	(0,0)(0,0)				EXPECTED
+|E	(a*){2}			xxxxx	(5,5)(5,5)				BUG=range-null
+;										BUG=range-null-UNKNOWN
+
+?B	a\(b\)*\1		abab	NOMATCH					EXPECTED
+|B	a\(b\)*\1		abab	(0,1)					# BUG=nomatch-match
+|B	a\(b\)*\1		abab	(0,2)(1,2)				BUG=repeat-any
+;										BUG=repeat-any-UNKNOWN
+
+?E	(a*)*			a	(0,1)(0,1)				EXPECTED
+|E	(a*)*			ax	(0,1)(0,1)				BUG=repeat-null-unknown
+|E	(a*)*			a	(0,1)(1,1)				BUG=repeat-null
+;										BUG=repeat-null-UNKNOWN
+
+?E	(aba|a*b)*		ababa	(0,5)(2,5)				EXPECTED
+|E	(aba|a*b)*		ababa	(0,5)(3,4)				BUG=repeat-short
+|E	(aba|a*b)*		ababa	(0,4)(3,4)				# LENGTH=first
+;										BUG=repeat-short-UNKNOWN
+
+?E	(a(b)?)+		aba	(0,3)(2,3)				EXPECTED
+|E	(a(b)?)+		aba	(0,3)(2,3)(1,2)				BUG=repeat-artifact
+;										BUG=repeat-artifact-UNKNOWN
+
+?B	\(a\(b\)*\)*\2		abab	NOMATCH					EXPECTED
+|B	\(a\(b\)*\)*\2		abab	(0,4)(2,3)(1,2)				BUG=repeat-artifact-nomatch
+;										BUG=repeat-artifact-nomatch-UNKNOWN
+
+?E	(a?)((ab)?)(b?)a?(ab)?b?	abab	(0,4)(0,1)(1,1)(?,?)(1,2)(2,4)	BUG=subexpression-first
+|E	.*(.*)				ab	(0,2)(2,2)			EXPECTED
+|E	.*(.*)				ab	(0,2)(0,2)			BUG=subexpression-first
+;										BUG=subexpression-first-UNKNOWN
diff --git a/forcedassoc.dat b/forcedassoc.dat
new file mode 100644
index 0000000..39f3111
--- /dev/null
+++ b/forcedassoc.dat
@@ -0,0 +1,30 @@
+NOTE	left-assoc:pass-all right-assoc:pass-all : 2002-04-29
+
+E	(a|ab)(c|bcd)		abcd	(0,4)(0,1)(1,4)
+E	(a|ab)(bcd|c)		abcd	(0,4)(0,1)(1,4)
+E	(ab|a)(c|bcd)		abcd	(0,4)(0,1)(1,4)
+E	(ab|a)(bcd|c)		abcd	(0,4)(0,1)(1,4)
+E	((a|ab)(c|bcd))(d*)	abcd	(0,4)(0,4)(0,1)(1,4)(4,4)
+E	((a|ab)(bcd|c))(d*)	abcd	(0,4)(0,4)(0,1)(1,4)(4,4)
+E	((ab|a)(c|bcd))(d*)	abcd	(0,4)(0,4)(0,1)(1,4)(4,4)
+E	((ab|a)(bcd|c))(d*)	abcd	(0,4)(0,4)(0,1)(1,4)(4,4)
+E	(a|ab)((c|bcd)(d*))	abcd	(0,4)(0,2)(2,4)(2,3)(3,4)
+E	(a|ab)((bcd|c)(d*))	abcd	(0,4)(0,2)(2,4)(2,3)(3,4)
+E	(ab|a)((c|bcd)(d*))	abcd	(0,4)(0,2)(2,4)(2,3)(3,4)
+E	(ab|a)((bcd|c)(d*))	abcd	(0,4)(0,2)(2,4)(2,3)(3,4)
+E	(a*)(b|abc)		abc	(0,3)(0,0)(0,3)
+E	(a*)(abc|b)		abc	(0,3)(0,0)(0,3)
+E	((a*)(b|abc))(c*)	abc	(0,3)(0,3)(0,0)(0,3)(3,3)
+E	((a*)(abc|b))(c*)	abc	(0,3)(0,3)(0,0)(0,3)(3,3)
+E	(a*)((b|abc)(c*))	abc	(0,3)(0,1)(1,3)(1,2)(2,3)
+E	(a*)((abc|b)(c*))	abc	(0,3)(0,1)(1,3)(1,2)(2,3)
+E	(a*)(b|abc)		abc	(0,3)(0,0)(0,3)
+E	(a*)(abc|b)		abc	(0,3)(0,0)(0,3)
+E	((a*)(b|abc))(c*)	abc	(0,3)(0,3)(0,0)(0,3)(3,3)
+E	((a*)(abc|b))(c*)	abc	(0,3)(0,3)(0,0)(0,3)(3,3)
+E	(a*)((b|abc)(c*))	abc	(0,3)(0,1)(1,3)(1,2)(2,3)
+E	(a*)((abc|b)(c*))	abc	(0,3)(0,1)(1,3)(1,2)(2,3)
+E	(a|ab)			ab	(0,2)(0,2)
+E	(ab|a)			ab	(0,2)(0,2)
+E	(a|ab)(b*)		ab	(0,2)(0,2)(2,2)
+E	(ab|a)(b*)		ab	(0,2)(0,2)(2,2)
diff --git a/leftassoc.dat b/leftassoc.dat
new file mode 100644
index 0000000..9c068c6
--- /dev/null
+++ b/leftassoc.dat
@@ -0,0 +1,16 @@
+NOTE	left-assoc:pass-all right-assoc:pass-none : 2002-04-29
+
+E	(a|ab)(c|bcd)(d*)	abcd	(0,4)(0,1)(1,4)(4,4)
+E	(a|ab)(bcd|c)(d*)	abcd	(0,4)(0,1)(1,4)(4,4)
+E	(ab|a)(c|bcd)(d*)	abcd	(0,4)(0,1)(1,4)(4,4)
+E	(ab|a)(bcd|c)(d*)	abcd	(0,4)(0,1)(1,4)(4,4)
+
+E	(a*)(b|abc)(c*)		abc	(0,3)(0,0)(0,3)(3,3)
+E	(a*)(abc|b)(c*)		abc	(0,3)(0,0)(0,3)(3,3)
+E	(a*)(b|abc)(c*)		abc	(0,3)(0,0)(0,3)(3,3)
+E	(a*)(abc|b)(c*)		abc	(0,3)(0,0)(0,3)(3,3)
+
+E	(a|ab)(c|bcd)(d|.*)	abcd	(0,4)(0,1)(1,4)(4,4)
+E	(a|ab)(bcd|c)(d|.*)	abcd	(0,4)(0,1)(1,4)(4,4)
+E	(ab|a)(c|bcd)(d|.*)	abcd	(0,4)(0,1)(1,4)(4,4)
+E	(ab|a)(bcd|c)(d|.*)	abcd	(0,4)(0,1)(1,4)(4,4)
diff --git a/man/man1/testregex.html b/man/man1/testregex.html
new file mode 100644
index 0000000..fe7f321
--- /dev/null
+++ b/man/man1/testregex.html
@@ -0,0 +1,142 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<HTML>
+<HEAD>
+<TITLE>testregex man document</TITLE>
+</HEAD>
+<BODY bgcolor=white>
+<PRE>
+NAME
+  testregex - regex(3) test harness
+
+SYNOPSIS
+  testregex [ options ]
+
+DESCRIPTION
+  testregex reads regex(3) test specifications, one per line, from the
+  standard input and writes one output line for each failed test. A
+  summary line is written after all tests are done. Each successful
+  test is run again with REG_NOSUB. Unsupported features are noted
+  before the first test, and tests requiring these features are
+  silently ignored.
+
+OPTIONS
+  -c	catch signals and non-terminating calls
+  -e	ignore error return mismatches
+  -h	list help on standard error
+  -n	do not repeat successful tests with regnexec()
+  -o	ignore match[] overrun errors
+  -p	ignore negative position mismatches
+  -s	use stack instead of malloc
+  -x	do not repeat successful tests with REG_NOSUB
+  -v	list each test line
+  -A	list failed test lines with actual answers
+  -B	list all test lines with actual answers
+  -F	list failed test lines
+  -P	list passed test lines
+  -S	output one summary line
+
+INPUT FORMAT
+  Input lines may be blank, a comment beginning with #, or a test
+  specification. A specification is five fields separated by one
+  or more tabs. NULL denotes the empty string and NIL denotes the
+  0 pointer.
+
+  Field 1: the regex(3) flags to apply, one character per REG_feature
+  flag. The test is skipped if REG_feature is not supported by the
+  implementation. If the first character is not [BEASKLP] then the
+  specification is a global control line. One or more of [BEASKLP] may be
+  specified; the test will be repeated for each mode.
+
+    B 	basic			BRE	(grep, ed, sed)
+    E 	REG_EXTENDED		ERE	(egrep)
+    A	REG_AUGMENTED		ARE	(egrep with negation)
+    S	REG_SHELL		SRE	(sh glob)
+    K	REG_SHELL|REG_AUGMENTED	KRE	(ksh glob)
+    L	REG_LITERAL		LRE	(fgrep)
+
+    a	REG_LEFT|REG_RIGHT	implicit ^...$
+    b	REG_NOTBOL		lhs does not match ^
+    c	REG_COMMENT		ignore space and #...\n
+    d	REG_SHELL_DOT		explicit leading . match
+    e	REG_NOTEOL		rhs does not match $
+    f	REG_MULTIPLE		multiple \n separated patterns
+    g	FNM_LEADING_DIR		testfnmatch only -- match until /
+    h	REG_MULTIREF		multiple digit backref
+    i	REG_ICASE		ignore case
+    j	REG_SPAN		. matches \n
+    k	REG_ESCAPE		\ to ecape [...] delimiter
+    l	REG_LEFT		implicit ^...
+    m	REG_MINIMAL		minimal match
+    n	REG_NEWLINE		explicit \n match
+    o	REG_ENCLOSED		(|&) magic inside [@|&](...)
+    p	REG_SHELL_PATH		explicit / match
+    q	REG_DELIMITED		delimited pattern
+    r	REG_RIGHT		implicit ...$
+    s	REG_SHELL_ESCAPED	\ not special
+    t	REG_MUSTDELIM		all delimiters must be specified
+    u	standard unspecified behavior -- errors not counted
+    v	REG_CLASS_ESCAPE	\ special inside [...]
+    w	REG_NOSUB		no subexpression match array
+    x	REG_LENIENT		let some errors slide
+    y	REG_LEFT		regexec() implicit ^...
+    z	REG_NULL		NULL subexpressions ok
+    $	                        expand C \c escapes in fields 2 and 3
+    /	                        field 2 is a regsubcomp() expression
+    =	                        field 3 is a regdecomp() expression
+
+  Field 1 control lines:
+
+    C		set LC_COLLATE and LC_CTYPE to locale in field 2
+
+    ?test ...	output field 5 if passed and != EXPECTED, silent otherwise
+    &test ...	output field 5 if current and previous passed
+    |test ...	output field 5 if current passed and previous failed
+    ; ...	output field 2 if previous failed
+    {test ...	skip if failed until }
+    }		end of skip
+
+    : comment		comment copied as output NOTE
+    :comment:test	:comment: ignored
+    N[OTE] comment	comment copied as output NOTE
+    T[EST] comment	comment
+
+    number		use number for nmatch (20 by default)
+
+  Field 2: the regular expression pattern; SAME uses the pattern from
+    the previous specification.
+
+  Field 3: the string to match.
+
+  Field 4: the test outcome. This is either one of the posix error
+    codes (with REG_ omitted) or the match array, a list of (m,n)
+    entries with m and n being first and last+1 positions in the
+    field 3 string, or NULL if REG_NOSUB is in effect and success
+    is expected. BADPAT is acceptable in place of any regcomp(3)
+    error code. The match[] array is initialized to (-2,-2) before
+    each test. All array elements from 0 to nmatch-1 must be specified
+    in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
+    Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
+    matched (?{...}) expression, where x is the text enclosed by {...},
+    o is the expression ordinal counting from 1, and n is the length of
+    the unmatched portion of the subject string. If x starts with a
+    number then that is the return value of re_execf(), otherwise 0 is
+    returned.
+
+  Field 5: optional comment appended to the report.
+
+CAVEAT
+    If a regex implementation misbehaves with memory then all bets are off.
+
+CONTRIBUTORS
+  Glenn Fowler    gsf@research.att.com        (ksh strmatch, regex extensions)
+  David Korn      dgk@research.att.com        (ksh glob matcher)
+  Doug McIlroy    mcilroy@dartmouth.edu       (ast regex/testre in C++)
+  Tom Lord        lord@regexps.com            (rx tests)
+  Henry Spencer   henry@zoo.toronto.edu       (original public regex)
+  Andrew Hume     andrew@research.att.com     (gre tests)
+  John Maddock    John_Maddock@compuserve.com (regex++ tests)
+  Philip Hazel    ph10@cam.ac.uk              (pcre tests)
+  Ville Laurikari vl@iki.fi                   (libtre tests)
+</PRE>
+</BODY>
+</HTML>
diff --git a/nullsubexpr.dat b/nullsubexpr.dat
new file mode 100644
index 0000000..c73d8f0
--- /dev/null
+++ b/nullsubexpr.dat
@@ -0,0 +1,73 @@
+NOTE	null subexpression matches : 2002-06-06
+
+E	(a*)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)+		a		(0,1)(0,1)
+E	SAME		x		NOMATCH
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+
+E	([a]*)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([a]*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([^b]*)*	a		(0,1)(0,1)
+E	SAME		b		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaab		(0,6)(0,6)
+E	([ab]*)*	a		(0,1)(0,1)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		ababab		(0,6)(0,6)
+E	SAME		bababa		(0,6)(0,6)
+E	SAME		b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+E	SAME		aaaabcde	(0,5)(0,5)
+E	([^a]*)*	b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+E	SAME		aaaaaa		(0,0)(0,0)
+E	([^ab]*)*	ccccxx		(0,6)(0,6)
+E	SAME		ababab		(0,0)(0,0)
+
+E	((z)+|a)*	zabcde		(0,2)(1,2)
+
+{E	a+?		aaaaaa		(0,1)	no *? +? mimimal match ops
+E	(a)		aaa		(0,1)(0,1)
+E	(a*?)		aaa		(0,0)(0,0)
+E	(a)*?		aaa		(0,0)
+E	(a*?)*?		aaa		(0,0)
+}
+
+B	\(a*\)*\(x\)		x	(0,1)(0,0)(0,1)
+B	\(a*\)*\(x\)		ax	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)		axa	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)\(\1\)	x	(0,1)(0,0)(0,1)(1,1)
+B	\(a*\)*\(x\)\(\1\)	ax	(0,2)(1,1)(1,2)(2,2)
+B	\(a*\)*\(x\)\(\1\)	axa	(0,3)(0,1)(1,2)(2,3)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axax	(0,4)(0,1)(1,2)(2,3)(3,4)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axxa	(0,3)(1,1)(1,2)(2,2)(2,3)
+
+E	(a*)*(x)		x	(0,1)(0,0)(0,1)
+E	(a*)*(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)*(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*)+(x)		x	(0,1)(0,0)(0,1)
+E	(a*)+(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)+(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*){2}(x)		x	(0,1)(0,0)(0,1)
+E	(a*){2}(x)		ax	(0,2)(1,1)(1,2)
+E	(a*){2}(x)		axa	(0,2)(1,1)(1,2)
diff --git a/re-assoc.html b/re-assoc.html
new file mode 100644
index 0000000..2bbc14b
--- /dev/null
+++ b/re-assoc.html
@@ -0,0 +1,64 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<HTML>
+<HEAD>
+<META name="generator" content="mm2html (AT&T Labs Research) 2005-10-15">
+<META name="keywords" content="regex catenation associativity tests">
+<TITLE> ../re/re-assoc.mm mm document </TITLE>
+<META name="author" content="gsf">
+</HEAD>
+<BODY bgcolor=white link=slateblue vlink=teal >
+<TABLE border=0 align=center width=96%>
+<TBODY><TR><TD valign=top align=left>
+<!--INDEX--><!--/INDEX-->
+<P>
+<HR>
+<CENTER>
+<H3><CENTER><FONT color=red><FONT face=courier>regex catenation associativity tests</FONT></FONT></CENTER></H3>
+<BR>Glenn Fowler <SMALL>&lt;<A href=mailto:gsf@research.att.com>gsf@research.att.com</A>&gt;</SMALL>
+<P><I>AT&amp;T Labs Research - Florham Park NJ</I>
+</CENTER>
+<P><HR><P>
+The
+<STRONG>regex</STRONG>
+tests in
+{
+	<A href="http://web.archive.org/web/20080724204655id_/http://www.research.att.com/~gsf/testregex/leftassoc.dat">leftassoc.dat</A>
+	<A href="http://web.archive.org/web/20080724204655id_/http://www.research.att.com/~gsf/testregex/rightassoc.dat">rightassoc.dat</A>
+	<A href="http://web.archive.org/web/20080724204655id_/http://www.research.att.com/~gsf/testregex/forcedassoc.dat">forcedassoc.dat</A>
+}
+exercise the associativity of catenation.
+<P>
+<HR>
+<TABLE border=0 align=center width=96%>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right><A href="mailto:gsf@research.att.com?subject= ../re/re-assoc.mm mm document">Glenn Fowler</A></TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Information and Software Systems Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>AT&amp;T Labs Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Florham Park NJ</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>August 04, 2002</TD>
+</TR>
+</TABLE>
+<P>
+
+</TD></TR></TBODY></TABLE>
+
+</BODY>
+</HTML>
diff --git a/re-categorize.html b/re-categorize.html
new file mode 100644
index 0000000..eb473d6
--- /dev/null
+++ b/re-categorize.html
@@ -0,0 +1,209 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<HTML>
+<HEAD>
+<META name="generator" content="mm2html (AT&T Labs Research) 2005-10-15">
+<META name="keywords" content="regex implementation categorization">
+<TITLE> ../re/re-categorize.mm mm document </TITLE>
+<META name="author" content="gsf">
+</HEAD>
+<BODY bgcolor=white link=slateblue vlink=teal >
+<TABLE border=0 align=center width=96%>
+<TBODY><TR><TD valign=top align=left>
+<!--INDEX--><!--/INDEX-->
+<P>
+<HR>
+<CENTER>
+<H3><CENTER><FONT color=red><FONT face=courier>regex implementation categorization</FONT></FONT></CENTER></H3>
+<BR>Glenn Fowler <SMALL>&lt;<A href=mailto:gsf@research.att.com>gsf@research.att.com</A>&gt;</SMALL>
+<P><I>AT&amp;T Labs Research - Florham Park NJ</I>
+</CENTER>
+<P><HR><P>
+The
+<STRONG>regex</STRONG>
+tests in
+	<A href="http://web.archive.org/web/20080726034626id_/http://www.research.att.com/~gsf/testregex/categorize.dat">categorize.dat</A>
+attempt to categorize
+<STRONG>regex</STRONG>
+implementations.
+The tests do not address internationalization.
+All implementations report the leftmost match; this is omitted from the table.
+<P></P><TABLE border=0 frame=void rules=none width=100%><TBODY><TR><TD>
+<TABLE align=center bgcolor=papayawhip border=0 bordercolor=white cellpadding=2 cellspacing=2 frame=void rules=none >
+<TBODY>
+<TR><TD align=center>LABEL&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;ASSOC&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;SUBEXPR&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;REP_LONGEST&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;BUGS</TD></TR>
+<TR><TD align=center>
+A&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;precedence&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;-</TD></TR>
+<TR><TD align=center>
+B&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-null&nbsp;&nbsp;repeat-short&nbsp;&nbsp;repeat-artifact-nomatch</TD></TR>
+<TR><TD align=center>
+D&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;-</TD></TR>
+<TR><TD align=center>
+G&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;alternation-order&nbsp;&nbsp;repeat-null&nbsp;&nbsp;repeat-artifact&nbsp;&nbsp;repeat-artifact-nomatch</TD></TR>
+<TR><TD align=center>
+H&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;alternation-order&nbsp;&nbsp;nomatch-match&nbsp;&nbsp;repeat-null&nbsp;&nbsp;repeat-artifact&nbsp;&nbsp;repeat-artifact-nomatch</TD></TR>
+<TR><TD align=center>
+I&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-any&nbsp;&nbsp;repeat-short&nbsp;&nbsp;repeat-artifact-nomatch</TD></TR>
+<TR><TD align=center>
+J&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;precedence&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;last&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;nomatch-match&nbsp;&nbsp;repeat-artifact&nbsp;&nbsp;repeat-artifact-nomatch&nbsp;&nbsp;subexpression-first</TD></TR>
+<TR><TD align=center>
+M&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;precedence&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;last&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;range-null&nbsp;&nbsp;repeat-artifact&nbsp;&nbsp;repeat-artifact-nomatch&nbsp;&nbsp;subexpression-first</TD></TR>
+<TR><TD align=center>
+O&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-null&nbsp;&nbsp;repeat-short&nbsp;&nbsp;repeat-artifact-nomatch</TD></TR>
+<TR><TD align=center>
+P&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;alternation-order&nbsp;&nbsp;first-match&nbsp;&nbsp;repeat-null&nbsp;&nbsp;repeat-artifact</TD></TR>
+<TR><TD align=center>
+R&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;left&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;precedence&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;last&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;-</TD></TR>
+<TR><TD align=center>
+S&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-null&nbsp;&nbsp;repeat-short&nbsp;&nbsp;repeat-artifact-nomatch</TD></TR>
+<TR><TD align=center>
+T&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;left&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;precedence&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;last&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;-</TD></TR>
+<TR><TD align=center>
+U&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;precedence&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-null&nbsp;&nbsp;subexpression-first</TD></TR>
+<TR><TD align=center>
+darwin.ppc&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-null&nbsp;&nbsp;repeat-short</TD></TR>
+<TR><TD align=center>
+freebsd.i386&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-null&nbsp;&nbsp;repeat-short</TD></TR>
+<TR><TD align=center>
+hp.pa&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-artifact</TD></TR>
+<TR><TD align=center>
+ibm.risc&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;alternation-order&nbsp;&nbsp;nomatch-match&nbsp;&nbsp;repeat-artifact&nbsp;&nbsp;repeat-artifact-nomatch</TD></TR>
+<TR><TD align=center>
+linux.i386&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;alternation-order&nbsp;&nbsp;repeat-artifact&nbsp;&nbsp;repeat-null</TD></TR>
+<TR><TD align=center>
+sgi.mips3&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-short</TD></TR>
+<TR><TD align=center>
+sol8.sun4&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;grouping&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;alternation-order&nbsp;&nbsp;nomatch-match&nbsp;&nbsp;repeat-artifact</TD></TR>
+<TR><TD align=center>
+unixware.i386&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;right&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;precedence&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;first&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;repeat-null&nbsp;&nbsp;subexpression-first</TD></TR>
+</TBODY></TABLE></TD></TR></TBODY></TABLE>
+<P>
+The categories are:
+<DL COMPACT>
+<DL COMPACT>
+<DT><STRONG>LABEL</STRONG><DD>
+The implementation label from
+	<A href="http://web.archive.org/web/20080726034626id_/http://www.research.att.com/~gsf/testregex/">testregex.</A>
+<DT><STRONG>ASSOC</STRONG><DD>
+Subpattern (or atom) associativity: either
+<STRONG>left</STRONG>
+or
+<STRONG>right</STRONG>.
+The subexpression match rule in the rationale requires
+<STRONG>right</STRONG>
+for expressions where each concatenated part is a subexpression.
+There is no definition for
+<EM>subpattern</EM>,
+but it would be inconsistent for any definition to require different
+associativity than that for subexpressions.
+Some claim that the BRE and ERE grammars specify
+<STRONG>left</STRONG>
+associativity, but this interpretation disregards
+the subexpression match rule in the rationale.
+The grammar can also be interpreted to support
+<STRONG>right</STRONG>
+associativity, and this interpretation is in accord with the rationale.
+<DT><STRONG>SUBEXPR</STRONG><DD>
+Subexpression semantics:
+<STRONG>precedence</STRONG>
+if subexpressions can override the default associativity;
+<STRONG>grouping</STRONG>
+if subexpressions are for repetition and
+<STRONG>regmatch_t</STRONG>
+grouping only.
+The subexpression match rule in the rationale requires
+<STRONG>precedence</STRONG>.
+<DT><STRONG>REP_LONGEST</STRONG><DD>
+How repeated subexpressions that match more than once are handled:
+<STRONG>first</STRONG>
+if the longest possible matches occur first;
+<STRONG>last</STRONG>
+if the longest possible matches occur last;
+<STRONG>unknown</STRONG>
+otherwise.
+The subexpression match rule in the rationale requires
+<STRONG>first</STRONG>.
+<DT><STRONG>BUGS</STRONG><DD>
+Miscellaneous bugs (see
+	<A href="http://web.archive.org/web/20080726034626id_/http://www.research.att.com/~gsf/testregex/categorize.dat">categorize.dat</A>
+for specific examples):
+<DL COMPACT>
+<DL COMPACT>
+<DT><STRONG>alternation-order</STRONG><DD>
+A change in the order of subexpression alternation operands,
+<EM>not involved in a tie</EM>,
+changes
+<STRONG>regmatch_t</STRONG>
+values.
+Some implementations with this bug can be coaxed into missing the
+overall longest match.
+<DT><STRONG>first-match</STRONG><DD>
+The first of the leftmost matches, instead of the longest of the
+leftmost matches, is returned.
+<DT><STRONG>nomatch-match</STRONG><DD>
+A back-reference to a
+<STRONG>regmatch_t</STRONG>
+(-1,-1) value is treated as matching.
+<DT><STRONG>range-null</STRONG><DD>
+A range-repeated subexpression that matches null does not report the match
+at offset (0,0).
+<DT><STRONG>repeat-artifact</STRONG><DD>
+A
+<STRONG>regmatch_t</STRONG>
+value is reported for a repeated match that is not the last match.
+<DT><STRONG>repeat-artifact-nomatch</STRONG><DD>
+To prevent not matching,
+a
+<STRONG>regmatch_t</STRONG>
+value is reported for a repeated match that is not the last match.
+<DT><STRONG>repeat-null</STRONG><DD>
+A repeated subexpression matches the null string even though it is not
+the only match and is not necessary to satisfy the exact or minimum
+number of occurrences for an interval expression.
+<DT><STRONG>repeat-short</STRONG><DD>
+Incorrect
+<STRONG>regmatch_t</STRONG>
+values for a repeated subexpression.
+This may be a variant of
+<STRONG>repeat-artifact</STRONG>.
+<DT><STRONG>subexpression-first</STRONG><DD>
+A subexpression match takes precedence over a subpattern
+to its left.
+</DL>
+</DL>
+</DL>
+</DL>
+<P>
+<HR>
+<TABLE border=0 align=center width=96%>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right><A href="mailto:gsf@research.att.com?subject= ../re/re-categorize.mm mm document">Glenn Fowler</A></TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Information and Software Systems Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>AT&amp;T Labs Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Florham Park NJ</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>June 01, 2004</TD>
+</TR>
+</TABLE>
+<P>
+
+</TD></TR></TBODY></TABLE>
+
+</BODY>
+</HTML>
diff --git a/re-interpretation.html b/re-interpretation.html
new file mode 100644
index 0000000..cb603b1
--- /dev/null
+++ b/re-interpretation.html
@@ -0,0 +1,997 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<HTML>
+<HEAD>
+<META name="generator" content="mm2html (AT&T Research) 2010-09-10">
+<META name="keywords" content="regex regular expression standard interpretation">
+<TITLE> ../re/re-interpretation.mm mm document </TITLE>
+<META name="author" content="gsf">
+</HEAD>
+<BODY bgcolor=white link=slateblue vlink=teal >
+<TABLE border=0 align=center width=96%>
+<TBODY><TR><TD valign=top align=left>
+<!--INDEX--><!--/INDEX-->
+<B><FONT size=-1 face="verdana,arial,helvetica,geneva,sans-serif">
+<TABLE align=center cellpadding=2 border=4 bgcolor=lightgrey><TR>
+<TD><A href="re-interpretation.html#Abstract">Abstract</A></TD>
+<TD><A href="re-interpretation.html#Background">Background</A></TD>
+<TD><A href="re-interpretation.html#Notation">Notation</A></TD>
+<TD><A href="re-interpretation.html#regex Glossary">regex Glossary</A></TD>
+<TD><A href="re-interpretation.html#A subexpression is ">A subexpression is </A></TD>
+<TD><A href="re-interpretation.html#A subpattern is ">A subpattern is </A></TD>
+<TD><A href="re-interpretation.html#The Dark Corners ">The Dark Corners </A></TD>
+<TD><A href="re-interpretation.html#Conclusion">Conclusion</A></TD>
+</TR></TABLE>
+</FONT></B>
+<P>
+<HR>
+<CENTER>
+<H3><CENTER><FONT color=red><FONT face=courier>An Interpretation of the POSIX regex Standard</FONT></FONT></CENTER></H3>
+<BR>Glenn Fowler <SMALL>&lt;<A href=mailto:gsf@research.att.com>gsf@research.att.com</A>&gt;</SMALL>
+<P><I>AT&amp;T Research - Florham Park NJ</I>
+</CENTER>
+<P>
+<CENTER><FONT color=red><FONT face=courier><H3 align=center><A name="Abstract">Abstract</A></H3></FONT></FONT></CENTER>
+Many passages in the POSIX
+<STRONG>regex</STRONG>
+standard seem to be open for interpretation.
+Differences between several published
+	<A href="http://www.research.att.com/~gsf/testregex/" target=_top>implementations</A>
+of the
+<STRONG>regex</STRONG>
+API bear this out.
+Instead of relegating these differences to the
+<EM>undefined behavior</EM>
+bucket, this paper proposes a resolution to each
+by direct application of the standard text.
+
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="Background">Background</A></H3></FONT></FONT></CENTER>
+The POSIX
+<STRONG>regex</STRONG>
+standard is spread across four documents:
+<P></P><TABLE border=0 frame=void rules=none width=100%><TBODY><TR><TD>
+<TABLE align=center bgcolor=papayawhip border=0 bordercolor=white cellpadding=2 cellspacing=2 >
+<TBODY>
+<TR><TD align=right>
+glossary&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;G&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;<A href="http://www.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap03.html" target=_top>http://www.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap03.html</A></TD></TR>
+<TR><TD align=right>
+api&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;A&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;<A href="http://www.opengroup.org/onlinepubs/007904975/functions/regcomp.html" target=_top>http://www.opengroup.org/onlinepubs/007904975/functions/regcomp.html</A></TD></TR>
+<TR><TD align=right>
+definition&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;D&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;<A href="http://www.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap09.html" target=_top>http://www.opengroup.org/onlinepubs/007904975/basedefs/xbd_chap09.html</A></TD></TR>
+<TR><TD align=right>
+rationale&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;R&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;<A href="http://www.opengroup.org/onlinepubs/007904975/xrat/xbd_chap09.html" target=_top>http://www.opengroup.org/onlinepubs/007904975/xrat/xbd_chap09.html</A></TD></TR>
+</TBODY></TABLE></TD></TR></TBODY></TABLE>
+<P>
+It describes
+<STRONG>BRE</STRONG>s
+(basic regular expressions, a.k.a.,
+<NOBR><A href="http://web.archive.org/~gsf/man/man1/grep.html"><STRONG>grep</STRONG></A>(1)</NOBR>
+style) and
+<STRONG>ERE</STRONG>s
+(extended regular expressions, a.k.a.,
+<NOBR><A href="http://web.archive.org/~gsf/man/man1/egrep.html"><STRONG>egrep</STRONG></A>(1)</NOBR>
+style)
+and how an RE of each type matches subject strings.
+The standard also provides an API:
+<NOBR><A href="http://web.archive.org/~gsf/man/man3/regcomp.html"><STRONG>regcomp</STRONG></A>(3)</NOBR>
+for compiling an RE, and
+<NOBR><A href="http://web.archive.org/~gsf/man/man3/regexec.html"><STRONG>regexec</STRONG></A>(3)</NOBR>
+for matching a compiled RE against a subject string.
+The
+<STRONG>regexec</STRONG>
+API
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+   int regexec(const regex_t* restrict preg, const char* restrict string,
+               size_t nmatch, regmatch_t pmatch&#0091;restrict&#0093;, int eflags);
+</DIV>
+</PRE>
+is at the center of multiple, conflicting interpretations of the standard.
+These interpretations differ on the setting of the
+<TT>pmatch&#0091;&#0093;</TT>
+array for index values &gt; 0.
+This note presents examples that demonstrate interpretation conflicts,
+and then provides standard references that,
+<EM>when taken as a whole</EM>,
+resolve the conflicts.
+
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="Notation">Notation</A></H3></FONT></FONT></CENTER>
+Standard references use the notation
+&#0091;<EM>document</EM>:<EM>begin</EM>&#0091;-<EM>end</EM>&#0093;&#0093;
+where
+<EM>document</EM>
+is the document letter, { A D G R }, from the table above,
+<EM>begin</EM>
+is the beginning line number, and
+<EM>end</EM>
+is the ending line number.
+Line numbers are taken from the 2001 X/Open printing.
+Unfortunately the online links do not display line numbers.
+For example, &#0091;A:37179-37180&#0093; is the reference for the
+<STRONG>regexec</STRONG>
+API prototype above.
+<P>
+Example patterns, subject strings, and
+<TT>pmatch&#0091;&#0093;</TT>
+array values use the regression test notation of
+	<A href="http://www.research.att.com/~gsf/testregex/" target=_top>testregex.</A>
+You can download the source and compile it against your favorite regex
+implementation.
+All of the examples in this note have been placed in the file
+	<A href="http://www.research.att.com/~gsf/testregex/interpretation.dat" target=_top>interpretation.dat;</A>
+you can download this file and use it as input to
+<STRONG>testregex</STRONG>.
+For example, the
+<STRONG>testregex</STRONG>
+input
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#01:E	a+			xaax	(1,3)
+</DIV>
+</PRE>
+specifies that the ERE pattern "a+" matched against the
+subject string "xaax" yields
+<TT>pmatch&#0091;0&#0093;.rm_so==1</TT>
+and
+<TT>pmatch&#0091;0&#0093;.rm_eo==3</TT>.
+The example is labeled RE#01 for indexing and referencing.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#02:B	.&#0092;(a*&#0092;).		xaax	(0,4)(1,3)
+</DIV>
+</PRE>
+specifies that the BRE pattern ".&#0092;(a*&#0092;)." matched against the subject
+string "xaax" yields
+<TT>pmatch&#0091;0&#0093;.rm_so==0</TT>,
+<TT>pmatch&#0091;0&#0093;.rm_eo==4</TT>,
+<TT>pmatch&#0091;1&#0093;.rm_so==1</TT>,
+<TT>pmatch&#0091;1&#0093;.rm_eo==3</TT>.
+(?,?) denotes
+<TT>rm_so</TT>
+and
+<TT>rm_eo</TT>
+values of -1, i.e., a non-match.
+The first field allows additional flags that exercise all of the
+<STRONG>REG_*</STRONG>
+<STRONG>regcomp</STRONG>
+and
+<STRONG>regexec</STRONG>
+flags; see
+<NOBR><A href="http://web.archive.org/~gsf/man/man1/testregex.html"><STRONG>testregex</STRONG></A>(1)</NOBR>
+or
+<STRONG>testregex --man</STRONG>
+for details.
+Note that
+<STRONG>tab</STRONG>
+is the field separator in the
+<STRONG>testregex</STRONG>
+syntax; if you mouse snarf then make sure that
+<STRONG>tabs</STRONG>
+are preserved.
+
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="regex Glossary">regex Glossary</A></H3></FONT></FONT></CENTER>
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT>&#0091;G:41&#0093;<STRONG>Basic Regular Expression (BRE)</STRONG><DD>
+A regular expression used by the majority of utilities that select strings
+from a set of character strings. 
+<DT>&#0091;G:148&#0093;<STRONG>Entire Regular Expression</STRONG><DD>
+The concatenated set of one or more basic regular expressions or extended
+regular expressions that make up the pattern specified for string selection. 
+<DT>&#0091;G:158&#0093;<STRONG>Extended Regular Expression (ERE)</STRONG><DD>
+A regular expression that is an alternative to the Basic Regular
+Expression using a more extensive syntax, occasionally used by some utilities. 
+<DT>&#0091;G:269&#0093;<STRONG>Pattern</STRONG><DD>
+A sequence of characters used either with regular expression notation or for
+pathname expansion, as a means of selecting various character strings or
+pathnames, respectively. 
+<DT>&#0091;G:316&#0093;<STRONG>Regular Expression</STRONG><DD>
+A pattern that selects specific strings from a set of character strings. 
+</DL>
+</DIV>
+
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="A subexpression is ">A subexpression is </A></H3></FONT></FONT></CENTER>
+The
+<STRONG>regex</STRONG>
+standard is surprisingly cavalier with terminology:
+some terms are used interchangeably, some are used in a general context
+in one section and a specific context in another, and some are
+used without any definition whatsoever.
+Acutely subject to this abuse are:
+<EM>RE</EM>,
+<EM>pattern</EM>,
+<EM>subpattern</EM>,
+<EM>expression</EM>,
+and
+<EM>subexpression</EM>.
+In particular,
+<EM>subpattern</EM>
+and
+<EM>subexpression</EM>
+are central to the description of the matching algorithm and how
+<TT>pmatch&#0091;&#0093;</TT>
+is assigned.
+Any interpretation of the
+<STRONG>regex</STRONG>
+standard involving these terms, absent a precise and accurate definition
+for each, is useless.
+<P>
+<EM>subexpression</EM>
+appears 70 times, and each reference is in the context of parenthesis grouping:
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT>&#0091;D:5909-5911&#0093;<DD>
+For example, matching the BRE "&#0092;(.*&#0092;).*" against "abcdef" , the
+subexpression "(&#0092;1)" is "abcdef" , and matching the BRE
+"&#0092;(a*&#0092;)*" against "bc" , the subexpression "(&#0092;1)" is the null
+string.
+<DT>&#0091;D:5984-5988&#0093;<DD>
+The asterisk shall be special except when used: As the first
+character of a subexpression (after an initial '^' , if any);
+<DT>&#0091;D:6094-6097&#0093;<DD>
+A subexpression can be defined within a BRE by enclosing it
+between the character pairs "&#0092;(" and "&#0092;)" . Subexpressions can
+be arbitrarily nested.
+<DT>&#0091;D:6100-6109&#0093;<DD>
+The character 'n' shall be a digit from 1 through 9, specifying
+the nth subexpression (the one that begins with the nth "&#0092;("
+from the beginning of the pattern and ends with the
+corresponding paired "&#0092;)" ). The expression is invalid if less
+than n subexpressions precede the '&#0092;n' . For example, the
+expression "&#0092;(.*&#0092;)&#0092;1$" matches a line consisting of two
+adjacent appearances of the same string, and the expression
+"&#0092;(a&#0092;)*&#0092;1" fails to match 'a' . When the referenced
+subexpression matched more than one string, the back-referenced
+expression shall refer to the last matched string. If the
+subexpression referenced by the back-reference matches more
+than one string because of an asterisk ( '*' ) or an interval
+expression (see item (5)), the back-reference shall match the
+last (rightmost) of these strings.
+<DT>&#0091;D:6110-6112&#0093;<DD>
+When a BRE matching a single character, a subexpression, or a
+back-reference is followed by the special character asterisk ('*' ),
+together with that asterisk it shall match what zero or
+more consecutive occurrences of the BRE would match.
+<DT>&#0091;D:6114-6117&#0093;<DD>
+When a BRE matching a single character, a subexpression, or a
+back-reference is followed by an interval expression of the
+format "&#0092;{m&#0092;}" , "&#0092;{m,&#0092;}" , or "&#0092;{m,n&#0092;}" , together with that
+interval expression it shall match what repeated consecutive
+occurrences of the BRE would match. "&#0092;{m,n&#0092;}" , together with
+that interval expression it shall match what repeated
+consecutive occurrences of the BRE would match.
+<DT>&#0091;D:6127-6129&#0093;<DD>
+A subexpression repeated by an asterisk ('*') or an interval expression
+shall not match a null expression unless this is the only match for the
+repetition or it is necessary to satisfy the exact or minimum number of
+occurrences for the interval expression.
+<DT>&#0091;D:6136&#0093;<DD>
+Subexpressions/back-references &#0092;(&#0092;) &#0092;n
+<DT>&#0091;D:6145-6151&#0093;<DD>
+The implementation may treat the circumflex as an anchor when
+used as the first character of a subexpression. The circumflex
+shall anchor the
+expression (or optionally subexpression) to the beginning of a
+string; only sequences starting at the first character of a
+string shall be matched by the BRE. For example, the BRE "^ab"
+matches "ab" in the string "abcdef" , but fails to match in the
+string "cdefab" . The BRE "&#0092;(^ab&#0092;)" may match the former
+string. A portable BRE shall escape a leading circumflex in a
+subexpression to match a literal circumflex.
+<DT>&#0091;D:6152-6156&#0093;<DD>
+A dollar sign ( '$' ) shall be an anchor when used as the last
+character of an entire BRE. The implementation may treat a
+dollar sign as an anchor when used as the last character of a
+subexpression. The dollar sign shall anchor the expression (or
+optionally subexpression) to the end of the string being matched;
+the dollar sign can be said to match the end-of-string following
+the last character.
+<DT>&#0091;D:6265-6270&#0093;<DD>
+A circumflex ( '^' ) outside a bracket expression shall anchor
+the expression or subexpression it begins to the beginning of a
+string; such an expression or subexpression can match only a
+sequence starting at the first character of a string. For
+example, the EREs "^ab" and "(^ab)" match "ab" in the string
+"abcdef" , but fail to match in the string "cdefab" , and the
+ERE "a^b" is valid, but can never match because the 'a'
+prevents the expression "^b" from matching starting at the
+first character.
+<DT>&#0091;D:6271-6276&#0093;<DD>
+A dollar sign ( '$' ) outside a bracket expression shall anchor
+the expression or subexpression it ends to the end of a string;
+such an expression or subexpression can match only a sequence
+ending at the last character of a string. For example, the EREs
+"ef$" and "(ef$)" match "ef" in the string "abcdef" , but fail
+to match in the string "cdefab" , and the ERE "e$f" is valid,
+but can never match because the 'f' prevents the expression
+"e$" from matching ending at the last character.
+<DT>&#0091;R:2359-2370&#0093;<DD>
+It is possible to determine what strings correspond to
+subexpressions by recursively applying the leftmost longest
+rule to each subexpression, but only with the proviso that the
+overall match is leftmost longest. For example, matching
+"&#0092;(ac*&#0092;)c*d&#0091;ac&#0093;*&#0092;1" against acdacaaa matches acdacaaa (with
+&#0092;1=a); simply matching the longest match for "&#0092;(ac*&#0092;)" would
+yield &#0092;1=ac, but the overall match would be smaller (acdac).
+Conceptually, the implementation must examine every possible
+match and among those that yield the leftmost longest total
+matches, pick the one that does the longest match for the
+leftmost subexpression, and so on. Note that this means that
+matching by subexpressions is context-dependent: a
+subexpression within a larger RE may match a different string
+from the one it would match as an independent RE, and two
+instances of the same subexpression within the same larger RE
+may match different lengths even in similar sequences of
+characters. For example, in the ERE "(a.*b)(a.*b)" , the two
+identical subexpressions would match four and six characters,
+respectively, of accbaccccb.
+<DT>&#0091;R:2512-2520&#0093;<DD>
+The limit of nine back-references to subexpressions in the RE
+is based on the use of a single-digit identifier; increasing
+this to multiple digits would break historical applications.
+This does not imply that only nine subexpressions are allowed
+in REs. The following is a valid BRE with ten subexpressions:
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+&#0092;(&#0092;(&#0092;(ab&#0092;)*c&#0092;)*d&#0092;)&#0092;(ef&#0092;)*&#0092;(gh&#0092;)&#0092;{2&#0092;}&#0092;(ij&#0092;)*&#0092;(kl&#0092;)*&#0092;(mn&#0092;)*&#0092;(op&#0092;)*&#0092;(qr&#0092;)*
+</DIV>
+</PRE>
+The standard developers regarded the common historical
+behavior, which supported "&#0092;n*" , but not "&#0092;n&#0092;{min,max&#0092;}" ,
+"&#0092;(...&#0092;)*" , or "&#0092;(...&#0092;)&#0092;{min,max&#0092;}" , as a non-intentional
+result of a specific implementation, and they supported both
+duplication and interval expressions following subexpressions
+and back-references.
+<DT>&#0091;R:2537-2544&#0093;<DD>
+However, one relatively uncommon case was changed to allow an
+extension used on some implementations. Historically, the BREs
+"^foo" and "&#0092;(^foo&#0092;)" did not match the same string, despite
+the general rule that subexpressions and entire BREs match the
+same strings. To increase consensus, IEEE Std 1003.1-2001 has
+allowed an extension on some implementations to treat these two
+cases in the same way by declaring that anchoring may occur at
+the beginning or end of a subexpression. Therefore, portable
+BREs that require a literal circumflex at the beginning or a
+dollar sign at the end of a subexpression must escape them.
+Note that a BRE such as "a&#0092;(^bc&#0092;)" will either match "a^bc" or
+nothing on different systems under the rules.
+<DT>&#0091;R:2549-2554&#0093;<DD>
+Some implementations have extended the BRE syntax to add
+alternation. For example, the subexpression "&#0092;(foo$&#0092;|bar&#0092;)"
+would match either "foo" at the end of the string or "bar"
+anywhere. The extension is triggered by the use of the
+undefined "&#0092;|" sequence. Because the BRE is undefined for
+portable scripts, the extending system is free to make other
+assumptions, such that the '$' represents the end-of-line
+anchor in the middle of a subexpression. If it were not for the
+extension, the '$' would match a literal dollar sign under the
+rules.
+<DT>&#0091;R:2617-2620&#0093;<DD>
+The removal of the Back_open_paren Back_close_paren option from
+the nondupl_RE specification is the result of PASC
+Interpretation 1003.2-92 #43 submitted for the ISO POSIX-2:1993
+standard. Although the grammar required support for null
+subexpressions, this section does not describe the meaning of,
+and historical practice did not support, this construct.
+<DT>&#0091;A:37188&#0093;<DD>
+size_t re_nsub Number of parenthesized subexpressions
+<DT>&#0091;A:37206-37208&#0093;<DD>
+If the REG_NOSUB flag was not set in cflags, then regcomp()
+shall set re_nsub to the number of parenthesized subexpressions
+(delimited by "&#0092;(&#0092;)" in basic regular expressions or "()" in
+extended regular expressions) found in pattern.
+<DT>&#0091;A:37220-37257&#0093;<DD>
+If nmatch is 0 or REG_NOSUB was set in the cflags argument to
+regcomp(), then regexec() shall ignore the pmatch argument.
+Otherwise, the application shall ensure that the pmatch
+argument points to an array with at least nmatch elements, and
+regexec() shall fill in the elements of that array with offsets
+of the substrings of string that correspond to the
+parenthesized subexpressions of pattern: pmatch&#0091;i&#0093;.rm_so
+shall be the byte offset of the beginning and pmatch&#0091;i&#0093;.rm_eo
+shall be one greater than the byte offset of the end of
+substring i. (Subexpression i begins at the ith matched open
+parenthesis, counting from 1.) Offsets in pmatch&#0091;0&#0093; identify
+the substring that corresponds to the entire regular
+expression. Unused elements of pmatch up to pmatch&#0091;nmatch-1&#0093;
+shall be filled with -1. If there are more than nmatch
+subexpressions in pattern ( pattern itself counts as a
+subexpression), then regexec() shall still do the match, but
+shall record only the first nmatch substrings.
+<P>
+When matching a basic or extended regular expression, any given
+parenthesized subexpression of pattern might participate in the
+match of several different substrings of string, or it might
+not match any substring even though the pattern as a whole did
+match. The following rules shall be used to determine which
+substrings to report in pmatch when matching regular
+expressions:
+<DIV style="padding-left:16px;text-indent:0px">
+<OL>
+<LI>
+If subexpression i in a regular expression is not contained
+within another subexpression, and it participated in the match
+several times, then the byte offsets in pmatch&#0091;i&#0093; shall
+delimit the last such match.
+<LI>
+If subexpression i is not contained within another
+subexpression, and it did not participate in an otherwise
+successful match, the byte offsets in pmatch&#0091;i&#0093; shall be -1. A
+subexpression does not participate in the match when:
+<PRE>
+&nbsp;'*' or "&#0092;{&#0092;}" appears immediately after the
+subexpression in a basic regular expression, or '*' ,
+&nbsp;'?' , or "{}" appears immediately after the
+subexpression in an extended regular expression, and
+the subexpression did not match (matched 0 times)
+<P>
+or:
+<P>
+&nbsp;'|' is used in an extended regular expression to select
+this subexpression or another, and the other
+subexpression matched.
+</PRE>
+<LI>
+If subexpression i is contained within another subexpression
+j, and i is not contained within any other subexpression that
+is contained within j, and a match of subexpression j is
+reported in pmatch&#0091;j&#0093;, then the match or non-match of
+subexpression i reported in pmatch&#0091;i&#0093; shall be as described in
+1. and 2. above, but within the substring reported in pmatch&#0091;
+j&#0093; rather than the whole string. The offsets in pmatch&#0091;i&#0093; are
+still relative to the start of string.
+<LI>
+If subexpression i is contained in subexpression j, and the
+byte offsets in pmatch&#0091;j&#0093; are -1, then the pointers in pmatch&#0091;
+i&#0093; shall also be -1.
+<LI>
+If subexpression i matched a zero-length string, then both
+byte offsets in pmatch&#0091;i&#0093; shall be the byte offset of the
+character or null terminator immediately following the
+zero-length string.
+</OL>
+</DIV>
+<DT>&#0091;A:37363-37366&#0093;<DD>
+The regexec() function must fill in all nmatch elements of
+pmatch, where nmatch and pmatch are supplied by the
+application, even if some elements of pmatch do not correspond
+to subexpressions in pattern. The application writer should
+note that there is probably no reason for using a value of
+nmatch that is larger than preg-&gt; re_nsub+1.
+<DT>&#0091;A:37407-37413&#0093;<DD>
+The number of subexpressions in the RE is reported in re_nsub
+in preg. With this change to regexec(), consideration was given
+to dropping the REG_NOSUB flag since the user can now specify
+this with a zero nmatch argument to regexec(). However, keeping
+REG_NOSUB allows an implementation to use a different (perhaps
+more efficient) algorithm if it knows in regcomp() that no
+subexpressions need be reported. The implementation is only
+required to fill in pmatch if nmatch is not zero and if
+REG_NOSUB is not specified.
+</DL>
+</DIV>
+<P>
+This sentence is as close as the standard gets to a definition:
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT>&#0091;A:37225-37226&#0093;<DD>
+Subexpression i begins at the ith matched open parenthesis, counting from 1.
+</DL>
+</DIV>
+<P>
+Using nonterminals from the BRE &#0091;D:6371-6731&#0093; and ERE &#0091;D:6452-6452&#0093; grammar
+productions (text not listed in this document) yields the following:
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT><STRONG>DEFINITION</STRONG><DD>
+A
+<EM>subexpression</EM>
+corresponds to the
+<TT>Back_open_paren RE_expression Back_close_paren</TT>
+form of the
+<TT>nondupl_RE</TT>
+BRE grammar production or
+the
+<TT>'(' extended_reg_exp ')'</TT>
+form of the
+<TT>ERE_expression</TT>
+ERE grammar production.
+Subexpression i begins at the ith matched open parenthesis
+(<TT>Back_open_paren</TT>
+for BREs and '(' for EREs),
+starting from the left and counting from 1.
+Subexpression 0 is the entire RE.
+</DL>
+</DIV>
+<P>
+This definition and the subexpression match rule &#0091;R:2359-2370&#0093; can be used to
+to examine a class of EREs where the top level catenation operands are
+subexpressions.
+(A top level subexpression is not contained in any other subexpression
+except subexpression 0.)
+The subexpression match rule in pseudo code is:
+<UL type=square>
+<LI>
+determine the longest of the leftmost matches for subexpression-0
+&#0091;R:2359-2361&#0093;
+<LI>
+for 1&lt;=<EM>i</EM>&lt;=<STRONG>re_nsub</STRONG>
+determine the longest match for
+subexpression-<EM>i</EM>
+consistent with the matches already determined for
+subexpression-<EM>j,</EM>
+0&lt;=<EM>j</EM>&lt;<EM>i</EM>.
+&#0091;R:2359-2370&#0093; &#0091;A:37235-37257&#0093;
+</UL>
+For example, given
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#03:E	(a?)((ab)?)		ab	(0,2)(0,0)(0,2)(0,2)
+</DIV>
+</PRE>
+the subexpressions are:
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+subexpression-0	(a?)((ab)?)
+subexpression-1	(a?)
+subexpression-2	((ab)?)
+subexpression-3	(ab)
+</DIV>
+</PRE>
+The longest of the leftmost matches for subexpression-0 is (0,2).
+The longest match for subexpression-1, consistent with the match
+for subexpression-0, is (0,0); otherwise if it had matched (0,1) then
+subexpression-2 would not match and the subexpression-0 match would be
+limited to (0,1).
+The longest match for subexpression-2, consistent with the matches
+for subexpression-0 and subexpression-1, is (0,2).
+The longest match for subexpression-3, consistent with the matches
+for subexpression-0, subexpression-1 and subexpression-2, is (0,2).
+This table illustrates the matching:
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+subexpr	pattern			match
+   0	(a?)((ab)?)		(0,2)
+   1	(a?)			(0,0)
+   2	((ab)?)			(0,2)
+   3	(ab)			(0,2)
+</DIV>
+</PRE>
+RE#04 is a similar example that exposes the associativity of subexpression
+concatenation:
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#04:E	(a?)((ab)?)(b?)		ab	(0,2)(0,1)(1,1)(?,?)(1,2)
+
+subexpr	pattern			match
+   0	(a?)((ab)?)(b?)		(0,2)
+   1	(a?)			(0,1)
+   2	((ab)?)			(1,1)
+   3	(ab)			(?,?)
+   4	(b?)			(1,2)
+</DIV>
+</PRE>
+&#0091;R:2363-2365&#0093; also shows that parenthesis can be used to alter the 
+order of matching:
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#05:E	((a?)((ab)?))(b?)	ab	(0,2)(0,2)(0,0)(0,2)(0,2)(2,2)
+
+subexpr	pattern			match
+   0	((a?)((ab)?))(b?)	(0,2)
+   1	((a?)((ab)?))		(0,2)
+   2	(a?)			(0,0)
+   3	((ab)?)			(0,2)
+   4	(ab)			(0,2)
+   5	(b?)			(2,2)
+</DIV>
+</PRE>
+In RE#05 the extra parenthesis (around subexpression-1 and subexpression-2 in
+RE#04) form a new subexpression-1, and change the
+match for the last subexpression
+<TT>(b?)</TT>
+to (2,2) (from (1,2) in RE#04.)
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#06:E	(a?)(((ab)?)(b?))	ab	(0,2)(0,1)(1,2)(1,1)(?,?)(1,2)
+
+subexpr	pattern			match
+   0	(a?)(((ab)?)(b?))	(0,2)
+   1	(a?)			(0,1)
+   2	(((ab)?)(b?))		(1,2)
+   3	((ab)?)			(1,1)
+   4	(ab)			(?,?)
+   5	(b?)			(1,2)
+</DIV>
+</PRE>
+In RE#06 the extra parenthesis pair forces right associativity and results
+in the same match of (1,2) for the last subexpression
+<TT>(b?)</TT>
+as in RE#04.
+These examples show that:
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT><STRONG>PROPERTY</STRONG><DD>
+Subexpression grouping can alter the precedence of concatenation.
+<DT><STRONG>PROPERTY</STRONG><DD>
+Subexpression concatenation is right associative.
+</DL>
+</DIV>
+<P>
+The following examples examine replicated subexpressions.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#07:E	(.?)			x	(0,1)(0,1)
+:RE#08:E	(.?){1}			x	(0,1)(0,1)
+:RE#09:E	(.?)(.?)		x	(0,1)(0,1)(1,1)
+:RE#10:E	(.?){2}			x	(0,1)(1,1)
+:RE#11:E	(.?)*			x	(0,1)(0,1)
+</DIV>
+</PRE>
+&#0091;D:6227-6234&#0093; specifies that RE#07 and RE#08 are equivalent, and that
+RE#09 and RE#10 are equivalent, and
+&#0091;D:6217-6219&#0093; specifies that RE#09 and RE#11 are equivalent.
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT>&#0091;D:6227-6234&#0093;<DD>
+When an ERE matching a single character or an ERE enclosed in
+parentheses is followed by an interval expression of the format "{m}" ,
+"{m,}" , or "{m,n}" , together with that interval expression it shall
+match what repeated consecutive occurrences of the ERE would match. The
+values of m and n are decimal integers in the range 0 &lt;= m&lt;= n&lt;=
+{RE_DUP_MAX}, where m specifies the exact or minimum number of
+occurrences and n specifies the maximum number of occurrences. The
+expression "{m}" matches exactly m occurrences of the preceding ERE,
+"{m,}" matches at least m occurrences, and "{m,n}" matches any number
+of occurrences between m and n, inclusive.
+<DT>&#0091;D:6217-6219&#0093;<DD>
+When an ERE matching a single character or an ERE enclosed in
+parentheses is followed by the special character asterisk ( '*' ),
+together with that asterisk it shall match what zero or more
+consecutive occurrences of the ERE would match.
+</DL>
+</DIV>
+In RE#09 subexpression-1 matches (0,1), leaving the null string at (1,1) for
+subexpression-2.
+In RE#10 the first iteration of subexpression-1 matches (0,1), the same
+as subexpression-1 in RE#09, and the second iteration of subexpression-1
+matches (1,1), the same as subexpression-2 in RE#09.
+RE#07 and RE#08 show that only one iteration is needed to match the subject
+string, so the match in RE#11 requires only one iteration, and as such is the
+last iteration of &#0091;D:6107-6109&#0093; &#0091;A:37235-37237&#0093;.
+RE#10 and RE#11 also illustrate &#0091;D:6127-6129&#0093; &#0091;D:6239-6241&#0093;, which
+specify that a repeated RE matches the null string only if it is the only
+match (not this case) or if it is necessary to satisfy an interval expression
+minimum (2 in this case.)
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT>&#0091;D:6239-6241&#0093;<DD>
+An ERE matching a single character repeated by an '*' , '?' , or an
+interval expression shall not match a null expression unless this is
+the only match for the repetition or it is necessary to satisfy the
+exact or minimum number of occurrences for the interval expression.
+</DL>
+</DIV>
+<P>
+The following examples dig deeper into replicated subexpressions.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#12:E	(.?.?)			xxx	(0,2)(0,2)
+:RE#13:E	(.?.?){1}		xxx	(0,2)(0,2)
+:RE#14:E	(.?.?)(.?.?)		xxx	(0,3)(0,2)(2,3)
+:RE#15:E	(.?.?){2}		xxx	(0,3)(2,3)
+:RE#16:E	(.?.?)(.?.?)(.?.?)	xxx	(0,3)(0,2)(2,3)(3,3)
+:RE#17:E	(.?.?){3}		xxx	(0,3)(3,3)
+:RE#18:E	(.?.?)*			xxx	(0,3)(2,3)
+</DIV>
+</PRE>
+Here RE#14 shows that only two iterations are needed for a complete match,
+making the last iteration match for RE#18 (2,3), since the first
+iteration matched (0,2), as in RE#14.
+
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="A subpattern is ">A subpattern is </A></H3></FONT></FONT></CENTER>
+The term
+<EM>subpattern</EM>
+appears exactly once:
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT>&#0091;D:5907-5908&#0093;<DD>
+Consistent with the whole match being the longest of the leftmost matches,
+each subpattern, from left to right, shall match the longest possible string.
+</DL>
+</DIV>
+Consider RE#04 and RE#05 again:
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#04:E	(a?)((ab)?)(b?)		ab	(0,2)(0,1)(1,1)(?,?)(1,2)
+:RE#05:E	((a?)((ab)?))(b?)	ab	(0,2)(0,2)(0,0)(0,2)(0,2)(2,2)
+</DIV>
+</PRE>
+If a subpattern were an entity that combined adjacent subexpressions,
+e.g.,
+<TT>(a?)((ab)?)</TT>
+in RE#04, then &#0091;D:5907-5908&#0093; would violate &#0091;R:2359-2370&#0093;.
+Similarly, if a subpattern were an entity that "went inside" subexpressions,
+e.g.,
+<TT>(a?)</TT>
+in RE#05, then again &#0091;D:5907-5908&#0093; would violate &#0091;R:2359-2370&#0093;.
+In other words, a subpattern can be neither larger than nor smaller than
+a subexpression;
+a subpattern must be a grammatical entity equivalent to a subexpression.
+This corresponds to the nonterminal
+<TT>nondupl_RE</TT>
+in the BRE grammar; there is no direct correspondence to a nonterminal
+in the ERE grammar.
+However, if the optional duplication operator (*,+,?,range) is included then
+subpattern corresponds to
+<TT>simple_RE</TT>
+in the BRE grammar and
+<TT>ERE_expression</TT>
+in the ERE grammar, and both &#0091;D:5907-5908&#0093; and &#0091;R:2359-2370&#0093; are satisfied.
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT><STRONG>DEFINITION</STRONG><DD>
+A
+<EM>subpattern</EM>
+corresponds to the
+<TT>simple_RE</TT>
+nonterminal in the BRE grammar or the
+<TT>ERE_expression</TT>
+nonterminal in the ERE grammar.
+</DL>
+</DIV>
+This means that subexpressions and subpatterns are of equal importance
+in RE matching.
+Also note that any other definition for subpattern will put
+&#0091;D:5907-5908&#0093; in direct conflict with &#0091;R:2359-2370&#0093;.
+<P>
+RE#19, RE#20 and RE#21 examine the relationship between subexpression
+and subpattern:
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#19:E	a?((ab)?)(b?)		ab	(0,2)(1,1)(?,?)(1,2)
+:RE#20:E	(a?)((ab)?)b?		ab	(0,2)(0,1)(1,1)(?,?)
+:RE#21:E	a?((ab)?)b?		ab	(0,2)(1,1)(?,?)
+</DIV>
+</PRE>
+<P>
+These are all variations of RE#04.
+Other than subexpression renumbering, the match for the subexpression
+<TT>((ab)?)</TT>
+must be the same in RE#04, RE#19, RE#20 and RE#21.
+<TT>a?</TT>
+is a subpattern in RE#19 and RE#21, of equal matching importance to
+<TT>(a?)</TT>
+in RE#04, and
+<TT>b?</TT>
+is a subpattern in RE#20 and RE#21, of equal matching
+importance to
+<TT>(b?)</TT>
+in RE#04.
+
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="The Dark Corners ">The Dark Corners </A></H3></FONT></FONT></CENTER>
+The remaining examples explore dark corners of the standard
+and implementations.
+Although the differences between some of the examples are subtle,
+for some implementations it may mean the difference between an answer and
+a core dump.
+<P>
+In RE#22 subexpression
+<TT>(a*)</TT>
+matches the null string at (0,0), and continues to match at that position
+until the minimal range count is satisfied.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#22:E	(a*){2}			xxxxx	(0,0)(0,0)
+</DIV>
+</PRE>
+RE#23 through RE#27 expose implementations that sometimes do
+<EM>first match</EM>
+for alternation within subexpressions.
+Some implementations erroneously match the first iteration of
+subexpression-1 in RE#24 through RE#27 to (0,1).
+RE#27 is equivalent to RE#26; the match requires two iterations, the first
+matching (0,2) and the last matching (2,3).
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#23:E	(ab?)(b?a)		aba	(0,3)(0,2)(2,3)
+:RE#24:E	(a|ab)(ba|a)		aba	(0,3)(0,2)(2,3)
+:RE#25:E	(a|ab|ba)		aba	(0,2)(0,2)
+:RE#26:E	(a|ab|ba)(a|ab|ba)	aba	(0,3)(0,2)(2,3)
+:RE#27:E	(a|ab|ba)*		aba	(0,3)(2,3)
+</DIV>
+</PRE>
+RE#28 through RE#33 expose implementations that report short matches
+for some repeated subexpressions.
+Some implementations report incorrect matches for
+subexpression-1 in RE#30 and RE#33.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#28:E	(aba|a*b)		ababa	(0,3)(0,3)
+:RE#29:E	(aba|a*b)(aba|a*b)	ababa	(0,5)(0,2)(2,5)
+:RE#30:E	(aba|a*b)*		ababa	(0,5)(2,5)
+:RE#31:E	(aba|ab|a)		ababa	(0,3)(0,3)
+:RE#32:E	(aba|ab|a)(aba|ab|a)	ababa	(0,5)(0,2)(2,5)
+:RE#33:E	(aba|ab|a)*		ababa	(0,5)(2,5)
+</DIV>
+</PRE>
+RE#34 through RE#36 expose implementations that report subexpression matches
+for earlier iterations of the subexpression.
+Some implementations report a match for subexpression-2 in RE#36
+while reporting the (2,3) match for subexpression-1: clearly a bug.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#34:E	(a(b)?)			aba	(0,2)(0,2)(1,2)
+:RE#35:E	(a(b)?)(a(b)?)		aba	(0,3)(0,2)(1,2)(2,3)(?,?)
+:RE#36:E	(a(b)?)+		aba	(0,3)(2,3)(?,?)
+</DIV>
+</PRE>
+RE#37 and RE#38 expose implementations that give priority to subexpression
+matching over subpattern matching.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#37:E	(.*)(.*)		xx	(0,2)(0,2)(2,2)
+:RE#38:E	.*(.*)			xx	(0,2)(2,2)
+</DIV>
+</PRE>
+RE#39 through RE#41 expose implementations that treat explicit vs. implicit
+subexpression repetition differently.
+This is a theme common to many of the previous examples.
+Again, the subexpression in RE#41 requires two iterations to match,
+and the second iteration matches (5,7), as illustrated by RE#40.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#39:E	(a.*z|b.*y)		azbazby	(0,5)(0,5)
+:RE#40:E	(a.*z|b.*y)(a.*z|b.*y)	azbazby	(0,7)(0,5)(5,7)
+:RE#41:E	(a.*z|b.*y)*		azbazby	(0,7)(5,7)
+</DIV>
+</PRE>
+RE#42 is another
+<EM>first match</EM>
+test.
+Some implementations erroneously report a match of (0,1) for subexpression-1.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#42:E	(.|..)(.*)		ab	(0,2)(0,2)(2,2)
+</DIV>
+</PRE>
+RE#43 through RE#45 require only one iteration of subexpression-1 to
+match the entire subject string.
+RE#45 exposes three separate bugs in the implementations that were tested.
+The most common was
+<EM>over iteration</EM>,
+where subexpression-1 is matched for a second iteration to the null string
+at (3,3).
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#43:E	((..)*(...)*)			xxx		(0,3)(0,3)(?,?)(0,3)
+:RE#44:E	((..)*(...)*)((..)*(...)*)	xxx		(0,3)(0,3)(?,?)(0,3)(3,3)(?,?)(?,?)
+:RE#45:E	((..)*(...)*)*			xxx		(0,3)(0,3)(?,?)(0,3)
+</DIV>
+</PRE>
+RE#46 through RE#82 are nasty;
+backreferences are intuitive neither for the implementor nor the user.
+<P>
+RE#49, RE#53, RE#67 and RE#68 illustrate the second part of the
+<EM>subpattern</EM>
+rule:
+<DIV style="padding-left:16px;text-indent:0px">
+<DL COMPACT>
+<DT>&#0091;D:5908-5909&#0093;<DD>
+For this purpose, a null string shall be considered to be longer than
+no match at all.
+</DL>
+</DIV>
+RE#53 requires close examination to see why the match is (0,2)(1,1)(2,2)
+instead of (0,2)(0,1)(?,?).
+The match of (0,1) for subexpression-1 is longer than (1,1), but
+subexpression-1 can be repeated, and that second iteration allows
+subexpression-2 to match (2,2), which is longer than (?,?) by &#0091;D:5908-5909&#0093;.
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+:RE#46:B	&#0092;(a&#0092;{0,1&#0092;}&#0092;)*b&#0092;1	ab	(0,2)(1,1)
+:RE#47:B	&#0092;(a*&#0092;)*b&#0092;1		ab	(0,2)(1,1)
+:RE#48:B	&#0092;(a*&#0092;)b&#0092;1*		ab	(0,2)(0,1)
+:RE#49:B	&#0092;(a*&#0092;)*b&#0092;1*		ab	(0,2)(1,1)
+:RE#50:B	&#0092;(a&#0092;{0,1&#0092;}&#0092;)*b&#0092;(&#0092;1&#0092;)	ab	(0,2)(1,1)(2,2)
+:RE#51:B	&#0092;(a*&#0092;)*b&#0092;(&#0092;1&#0092;)		ab	(0,2)(1,1)(2,2)
+:RE#52:B	&#0092;(a*&#0092;)b&#0092;(&#0092;1&#0092;)*		ab	(0,2)(0,1)(?,?)
+:RE#53:B	&#0092;(a*&#0092;)*b&#0092;(&#0092;1&#0092;)*		ab	(0,2)(1,1)(2,2)
+:RE#54:B	&#0092;(a&#0092;{0,1&#0092;}&#0092;)*b&#0092;1	aba	(0,3)(0,1)
+:RE#55:B	&#0092;(a*&#0092;)*b&#0092;1		aba	(0,3)(0,1)
+:RE#56:B	&#0092;(a*&#0092;)b&#0092;1*		aba	(0,3)(0,1)
+:RE#57:B	&#0092;(a*&#0092;)*b&#0092;1*		aba	(0,3)(0,1)
+:RE#58:B	&#0092;(a*&#0092;)*b&#0092;(&#0092;1&#0092;)*		aba	(0,3)(0,1)(2,3)
+:RE#59:B	&#0092;(a&#0092;{0,1&#0092;}&#0092;)*b&#0092;1	abaa	(0,3)(0,1)
+:RE#60:B	&#0092;(a*&#0092;)*b&#0092;1		abaa	(0,3)(0,1)
+:RE#61:B	&#0092;(a*&#0092;)b&#0092;1*		abaa	(0,4)(0,1)
+:RE#62:B	&#0092;(a*&#0092;)*b&#0092;1*		abaa	(0,4)(0,1)
+:RE#63:B	&#0092;(a*&#0092;)*b&#0092;(&#0092;1&#0092;)*		abaa	(0,4)(0,1)(3,4)
+:RE#64:B	&#0092;(a&#0092;{0,1&#0092;}&#0092;)*b&#0092;1	aab	(0,3)(2,2)
+:RE#65:B	&#0092;(a*&#0092;)*b&#0092;1		aab	(0,3)(2,2)
+:RE#66:B	&#0092;(a*&#0092;)b&#0092;1*		aab	(0,3)(0,2)
+:RE#67:B	&#0092;(a*&#0092;)*b&#0092;1*		aab	(0,3)(2,2)
+:RE#68:B	&#0092;(a*&#0092;)*b&#0092;(&#0092;1&#0092;)*		aab	(0,3)(2,2)(3,3)
+:RE#69:B	&#0092;(a&#0092;{0,1&#0092;}&#0092;)*b&#0092;1	aaba	(0,4)(1,2)
+:RE#70:B	&#0092;(a*&#0092;)*b&#0092;1		aaba	(0,4)(1,2)
+:RE#71:B	&#0092;(a*&#0092;)b&#0092;1*		aaba	(0,3)(0,2)
+:RE#72:B	&#0092;(a*&#0092;)*b&#0092;1*		aaba	(0,4)(1,2)
+:RE#73:B	&#0092;(a*&#0092;)*b&#0092;(&#0092;1&#0092;)*		aaba	(0,4)(1,2)(3,4)
+:RE#74:B	&#0092;(a&#0092;{0,1&#0092;}&#0092;)*b&#0092;1	aabaa	(0,4)(1,2)
+:RE#75:B	&#0092;(a*&#0092;)*b&#0092;1		aabaa	(0,5)(0,2)
+:RE#76:B	&#0092;(a*&#0092;)b&#0092;1*		aabaa	(0,5)(0,2)
+:RE#77:B	&#0092;(a*&#0092;)*b&#0092;1*		aabaa	(0,5)(0,2)
+:RE#78:B	&#0092;(a*&#0092;)*b&#0092;(&#0092;1&#0092;)*		aabaa	(0,5)(0,2)(3,5)
+:RE#79:B	&#0092;(x&#0092;)*a&#0092;1		a	NOMATCH
+:RE#80:B	&#0092;(x&#0092;)*a&#0092;1*		a	(0,1)(?,?)
+:RE#81:B	&#0092;(x&#0092;)*a&#0092;(&#0092;1&#0092;)		a	NOMATCH
+:RE#82:B	&#0092;(x&#0092;)*a&#0092;(&#0092;1&#0092;)*		a	(0,1)(?,?)(?,?)
+:RE#83:E	(aa(b(b))?)+		aabbaa	(0,6)(4,6)(?,?)(?,?)
+:RE#84:E	(a(b)?)+		aba	(0,3)(2,3)(?,?)
+:RE#85:E	(&#0091;ab&#0093;+)(&#0091;bc&#0093;+)(&#0091;cd&#0093;*)		abcd		(0,4)(0,2)(2,3)(3,4)
+:RE#86:B	&#0092;(&#0091;ab&#0093;*&#0092;)&#0092;(&#0091;bc&#0093;*&#0092;)&#0092;(&#0091;cd&#0093;*&#0092;)&#0092;1	abcdaa		(0,5)(0,1)(1,3)(3,4)
+:RE#87:B	&#0092;(&#0091;ab&#0093;*&#0092;)&#0092;(&#0091;bc&#0093;*&#0092;)&#0092;(&#0091;cd&#0093;*&#0092;)&#0092;1	abcdab		(0,6)(0,2)(2,3)(3,4)
+:RE#88:B	&#0092;(&#0091;ab&#0093;*&#0092;)&#0092;(&#0091;bc&#0093;*&#0092;)&#0092;(&#0091;cd&#0093;*&#0092;)&#0092;1*	abcdaa		(0,6)(0,1)(1,3)(3,4)
+:RE#89:B	&#0092;(&#0091;ab&#0093;*&#0092;)&#0092;(&#0091;bc&#0093;*&#0092;)&#0092;(&#0091;cd&#0093;*&#0092;)&#0092;1*	abcdab		(0,6)(0,2)(2,3)(3,4)
+:RE#90:E	^(A(&#0091;^B&#0093;*))?(B(.*))?		Aa		(0,2)(0,2)(1,2)
+:RE#91:E	^(A(&#0091;^B&#0093;*))?(B(.*))?		Bb		(0,2)(?,?)(?,?)(0,2)(1,2)
+:RE#92:B	.*&#0092;(&#0091;AB&#0093;&#0092;).*&#0092;1			ABA		(0,3)(0,1)
+:RE#93:B$	&#0091;^A&#0093;*A				&#0092;nA		(0,2)
+</DIV>
+</PRE>
+
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="Conclusion">Conclusion</A></H3></FONT></FONT></CENTER>
+It is possible to use the 2001 issue of the POSIX
+<STRONG>regex</STRONG>
+standard,
+<EM>with the addition of one sentence</EM>,
+to resolve the interpretation differences that have surfaced since 1995.
+That key sentence is a precise and consistent definition for the term
+<EM>subpattern</EM>.
+By noting the relationship between
+<EM>subpatterns</EM>
+and
+<EM>subexpressions</EM>,
+the proposed definition is shown to be the only one that can be
+consistent with all parts of the standard.
+<P>
+<HR>
+<TABLE border=0 align=center width=96%>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right><A href="mailto:gsf@research.att.com?subject= ../re/re-interpretation.mm mm document">Glenn Fowler</A></TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Information and Software Systems Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>AT&amp;T Labs Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Florham Park NJ</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>January 2003</TD>
+</TR>
+</TABLE>
+<P>
+
+</TD></TR></TBODY></TABLE>
+
+</BODY>
+</HTML>
diff --git a/re-nullsubexpr.html b/re-nullsubexpr.html
new file mode 100644
index 0000000..f0d7d1f
--- /dev/null
+++ b/re-nullsubexpr.html
@@ -0,0 +1,62 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<HTML>
+<HEAD>
+<META name="generator" content="mm2html (AT&T Labs Research) 2005-10-15">
+<META name="keywords" content="regular expression null subexpression tests">
+<TITLE> ../re/re-nullsubexpr.mm mm document </TITLE>
+<META name="author" content="gsf">
+</HEAD>
+<BODY bgcolor=white link=slateblue vlink=teal >
+<TABLE border=0 align=center width=96%>
+<TBODY><TR><TD valign=top align=left>
+<!--INDEX--><!--/INDEX-->
+<P>
+<HR>
+<CENTER>
+<H3><CENTER><FONT color=red><FONT face=courier>regular expression null subexpression tests</FONT></FONT></CENTER></H3>
+<BR>Glenn Fowler <SMALL>&lt;<A href=mailto:gsf@research.att.com>gsf@research.att.com</A>&gt;</SMALL>
+<P><I>AT&amp;T Labs Research - Florham Park NJ</I>
+</CENTER>
+<P><HR><P>
+The
+<STRONG>regex</STRONG>
+tests in
+	<A href="http://web.archive.org/web/20080709091423id_/http://www.research.att.com/~gsf/testregex/nullsubexpr.dat">nullsubexpr.dat</A>
+exercise
+<STRONG>regex</STRONG>
+null subexpression matching.
+<P>
+<HR>
+<TABLE border=0 align=center width=96%>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right><A href="mailto:gsf@research.att.com?subject= ../re/re-nullsubexpr.mm mm document">Glenn Fowler</A></TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Information and Software Systems Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>AT&amp;T Labs Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Florham Park NJ</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>August 04, 2002</TD>
+</TR>
+</TABLE>
+<P>
+
+</TD></TR></TBODY></TABLE>
+
+</BODY>
+</HTML>
diff --git a/re-repetition.html b/re-repetition.html
new file mode 100644
index 0000000..2381811
--- /dev/null
+++ b/re-repetition.html
@@ -0,0 +1,60 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<HTML>
+<HEAD>
+<META name="generator" content="mm2html (AT&T Labs Research) 2005-10-15">
+<META name="keywords" content="regular expression repetition tests">
+<TITLE> ../re/re-repetition.mm mm document </TITLE>
+<META name="author" content="gsf">
+</HEAD>
+<BODY bgcolor=white link=slateblue vlink=teal >
+<TABLE border=0 align=center width=96%>
+<TBODY><TR><TD valign=top align=left>
+<!--INDEX--><!--/INDEX-->
+<P>
+<HR>
+<CENTER>
+<H3><CENTER><FONT color=red><FONT face=courier>regular expression repetition tests</FONT></FONT></CENTER></H3>
+<BR>Glenn Fowler <SMALL>&lt;<A href=mailto:gsf@research.att.com>gsf@research.att.com</A>&gt;</SMALL>
+<P><I>AT&amp;T Labs Research - Florham Park NJ</I>
+</CENTER>
+<P><HR><P>
+The
+<STRONG>regex</STRONG>
+tests in
+	<A href="http://web.archive.org/web/20080726033833id_/http://www.research.att.com/~gsf/testregex/repetition.dat">repetition.dat</A>
+exercise explicit and implicit repetition.
+<P>
+<HR>
+<TABLE border=0 align=center width=96%>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right><A href="mailto:gsf@research.att.com?subject= ../re/re-repetition.mm mm document">Glenn Fowler</A></TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Information and Software Systems Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>AT&amp;T Labs Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Florham Park NJ</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>August 04, 2002</TD>
+</TR>
+</TABLE>
+<P>
+
+</TD></TR></TBODY></TABLE>
+
+</BODY>
+</HTML>
diff --git a/repetition.dat b/repetition.dat
new file mode 100644
index 0000000..b54a2c6
--- /dev/null
+++ b/repetition.dat
@@ -0,0 +1,79 @@
+NOTE	implicit vs. explicit repetitions : 2002-08-01
+#
+# Glenn Fowler <gsf@research.att.com>
+# conforming matches (column 4) must match one of the following BREs
+#	NOMATCH
+#	(0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
+#	(0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
+# i.e., each 3-tuple has two identical elements and one (?,?)
+#
+
+E	((..)|(.))				NULL		NOMATCH
+E	((..)|(.))((..)|(.))			NULL		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		NULL		NOMATCH
+
+E	((..)|(.)){1}				NULL		NOMATCH
+E	((..)|(.)){2}				NULL		NOMATCH
+E	((..)|(.)){3}				NULL		NOMATCH
+
+E	((..)|(.))*				NULL		(0,0)
+
+E	((..)|(.))				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.))((..)|(.))			a		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		a		NOMATCH
+
+E	((..)|(.)){1}				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.)){2}				a		NOMATCH
+E	((..)|(.)){3}				a		NOMATCH
+
+E	((..)|(.))*				a		(0,1)(0,1)(?,?)(0,1)
+
+E	((..)|(.))				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aa		(0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
+E	((..)|(.))((..)|(.))((..)|(.))		aa		NOMATCH
+
+E	((..)|(.)){1}				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aa		(0,2)(1,2)(?,?)(1,2)
+E	((..)|(.)){3}				aa		NOMATCH
+
+E	((..)|(.))*				aa		(0,2)(0,2)(0,2)(?,?)
+
+E	((..)|(.))				aaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaa		(0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
+E	((..)|(.))((..)|(.))((..)|(.))		aaa		(0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
+
+E	((..)|(.)){1}				aaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.)){3}				aaa		(0,3)(2,3)(?,?)(2,3)
+
+E	((..)|(.))*				aaa		(0,3)(2,3)(?,?)(2,3)
+
+E	((..)|(.))				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaa		(0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
+
+E	((..)|(.)){1}				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaa		(0,4)(2,4)(2,4)(?,?)
+E	((..)|(.)){3}				aaaa		(0,4)(3,4)(?,?)(3,4)
+
+E	((..)|(.))*				aaaa		(0,4)(2,4)(2,4)(?,?)
+
+E	((..)|(.))				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaa		(0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
+
+E	((..)|(.)){1}				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaa		(0,4)(2,4)(2,4)(?,?)
+E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(?,?)(4,5)
+
+E	((..)|(.))*				aaaaa		(0,5)(4,5)(?,?)(4,5)
+
+E	((..)|(.))				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaaa		(0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
+
+E	((..)|(.)){1}				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaaa		(0,4)(2,4)(2,4)(?,?)
+E	((..)|(.)){3}				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+E	((..)|(.))*				aaaaaa		(0,6)(4,6)(4,6)(?,?)
diff --git a/rightassoc.dat b/rightassoc.dat
new file mode 100644
index 0000000..ed7f28e
--- /dev/null
+++ b/rightassoc.dat
@@ -0,0 +1,16 @@
+NOTE	left-assoc:pass-none right-assoc:pass-all : 2002-04-29
+
+E	(a|ab)(c|bcd)(d*)	abcd	(0,4)(0,2)(2,3)(3,4)
+E	(a|ab)(bcd|c)(d*)	abcd	(0,4)(0,2)(2,3)(3,4)
+E	(ab|a)(c|bcd)(d*)	abcd	(0,4)(0,2)(2,3)(3,4)
+E	(ab|a)(bcd|c)(d*)	abcd	(0,4)(0,2)(2,3)(3,4)
+
+E	(a*)(b|abc)(c*)		abc	(0,3)(0,1)(1,2)(2,3)
+E	(a*)(abc|b)(c*)		abc	(0,3)(0,1)(1,2)(2,3)
+E	(a*)(b|abc)(c*)		abc	(0,3)(0,1)(1,2)(2,3)
+E	(a*)(abc|b)(c*)		abc	(0,3)(0,1)(1,2)(2,3)
+
+E	(a|ab)(c|bcd)(d|.*)	abcd	(0,4)(0,2)(2,3)(3,4)
+E	(a|ab)(bcd|c)(d|.*)	abcd	(0,4)(0,2)(2,3)(3,4)
+E	(ab|a)(c|bcd)(d|.*)	abcd	(0,4)(0,2)(2,3)(3,4)
+E	(ab|a)(bcd|c)(d|.*)	abcd	(0,4)(0,2)(2,3)(3,4)
diff --git a/testregex.c b/testregex.c
new file mode 100644
index 0000000..7b86ab7
--- /dev/null
+++ b/testregex.c
@@ -0,0 +1,2121 @@
+#pragma prototyped noticed
+
+/*
+ * regex(3) test harness
+ *
+ * build:	cc -o testregex testregex.c
+ * help:	testregex --man
+ * note:	REG_* features are detected by #ifdef; if REG_* are enums
+ *		then supply #define REG_foo REG_foo for each enum REG_foo
+ *
+ *	Glenn Fowler <gsf@research.att.com>
+ *	AT&T Labs Research
+ *
+ * PLEASE: publish your tests so everyone can benefit
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
+ * without restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, and/or sell copies of the
+ * Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following disclaimer:
+ *
+ * THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+static const char id[] = "\n@(#)$Id: testregex (AT&T Research) 2005-05-20 $\0\n";
+
+#if _PACKAGE_ast
+#include <ast.h>
+#else
+#include <sys/types.h>
+#endif
+
+#include <stdio.h>
+#include <regex.h>
+#include <ctype.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifdef	__STDC__
+#include <stdlib.h>
+#include <locale.h>
+#endif
+
+#if !_PACKAGE_ast
+#undef	REG_DISCIPLINE
+#endif
+
+#ifndef REG_DELIMITED
+#undef	_REG_subcomp
+#endif
+
+#define TEST_ARE		0x00000001
+#define TEST_BRE		0x00000002
+#define TEST_ERE		0x00000004
+#define TEST_KRE		0x00000008
+#define TEST_LRE		0x00000010
+#define TEST_SRE		0x00000020
+
+#define TEST_EXPAND		0x00000040
+#define TEST_LENIENT		0x00000080
+
+#define TEST_QUERY		0x00000100
+#define TEST_SUB		0x00000200
+#define TEST_UNSPECIFIED	0x00000400
+#define TEST_VERIFY		0x00000800
+#define TEST_AND		0x00001000
+#define TEST_OR			0x00002000
+
+#define TEST_DELIMIT		0x00010000
+#define TEST_OK			0x00020000
+#define TEST_SAME		0x00040000
+
+#define TEST_ACTUAL		0x00100000
+#define TEST_BASELINE		0x00200000
+#define TEST_FAIL		0x00400000
+#define TEST_PASS		0x00800000
+#define TEST_SUMMARY		0x01000000
+
+#define TEST_IGNORE_ERROR	0x02000000
+#define TEST_IGNORE_OVER	0x04000000
+#define TEST_IGNORE_POSITION	0x08000000
+
+#define TEST_CATCH		0x10000000
+#define TEST_VERBOSE		0x20000000
+
+#define TEST_GLOBAL		(TEST_ACTUAL|TEST_AND|TEST_BASELINE|TEST_CATCH|TEST_FAIL|TEST_IGNORE_ERROR|TEST_IGNORE_OVER|TEST_IGNORE_POSITION|TEST_OR|TEST_PASS|TEST_SUMMARY|TEST_VERBOSE)
+
+#ifdef REG_DISCIPLINE
+
+
+#include <stk.h>
+
+typedef struct Disc_s
+{
+	regdisc_t	disc;
+	int		ordinal;
+	Sfio_t*		sp;
+} Disc_t;
+
+static void*
+compf(const regex_t* re, const char* xstr, size_t xlen, regdisc_t* disc)
+{
+	Disc_t*		dp = (Disc_t*)disc;
+
+	return (void*)++dp->ordinal;
+}
+
+static int
+execf(const regex_t* re, void* data, const char* xstr, size_t xlen, const char* sstr, size_t slen, char** snxt, regdisc_t* disc)
+{
+	Disc_t*		dp = (Disc_t*)disc;
+
+	sfprintf(dp->sp, "{%-.*s}(%d:%d)", xlen, xstr, (int)data, slen);
+	return atoi(xstr);
+}
+
+static void*
+resizef(void* handle, void* data, size_t size)
+{
+	if (!size)
+		return 0;
+	return stkalloc((Sfio_t*)handle, size);
+}
+
+#endif
+
+#ifndef NiL
+#ifdef	__STDC__
+#define NiL		0
+#else
+#define NiL		(char*)0
+#endif
+#endif
+
+#define H(x)		do{if(html)fprintf(stderr,x);}while(0)
+#define T(x)		fprintf(stderr,x)
+
+static void
+help(int html)
+{
+H("<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n");
+H("<HTML>\n");
+H("<HEAD>\n");
+H("<TITLE>testregex man document</TITLE>\n");
+H("</HEAD>\n");
+H("<BODY bgcolor=white>\n");
+H("<PRE>\n");
+T("NAME\n");
+T("  testregex - regex(3) test harness\n");
+T("\n");
+T("SYNOPSIS\n");
+T("  testregex [ options ]\n");
+T("\n");
+T("DESCRIPTION\n");
+T("  testregex reads regex(3) test specifications, one per line, from the\n");
+T("  standard input and writes one output line for each failed test. A\n");
+T("  summary line is written after all tests are done. Each successful\n");
+T("  test is run again with REG_NOSUB. Unsupported features are noted\n");
+T("  before the first test, and tests requiring these features are\n");
+T("  silently ignored.\n");
+T("\n");
+T("OPTIONS\n");
+T("  -c	catch signals and non-terminating calls\n");
+T("  -e	ignore error return mismatches\n");
+T("  -h	list help on standard error\n");
+T("  -n	do not repeat successful tests with regnexec()\n");
+T("  -o	ignore match[] overrun errors\n");
+T("  -p	ignore negative position mismatches\n");
+T("  -s	use stack instead of malloc\n");
+T("  -x	do not repeat successful tests with REG_NOSUB\n");
+T("  -v	list each test line\n");
+T("  -A	list failed test lines with actual answers\n");
+T("  -B	list all test lines with actual answers\n");
+T("  -F	list failed test lines\n");
+T("  -P	list passed test lines\n");
+T("  -S	output one summary line\n");
+T("\n");
+T("INPUT FORMAT\n");
+T("  Input lines may be blank, a comment beginning with #, or a test\n");
+T("  specification. A specification is five fields separated by one\n");
+T("  or more tabs. NULL denotes the empty string and NIL denotes the\n");
+T("  0 pointer.\n");
+T("\n");
+T("  Field 1: the regex(3) flags to apply, one character per REG_feature\n");
+T("  flag. The test is skipped if REG_feature is not supported by the\n");
+T("  implementation. If the first character is not [BEASKL] then the\n");
+T("  specification is a global control line. One or more of [BEASKL] may be\n");
+T("  specified; the test will be repeated for each mode.\n");
+T("\n");
+T("    B 	basic			BRE	(grep, ed, sed)\n");
+T("    E 	REG_EXTENDED		ERE	(egrep)\n");
+T("    A	REG_AUGMENTED		ARE	(egrep with negation)\n");
+T("    S	REG_SHELL		SRE	(sh glob)\n");
+T("    K	REG_SHELL|REG_AUGMENTED	KRE	(ksh glob)\n");
+T("    L	REG_LITERAL		LRE	(fgrep)\n");
+T("\n");
+T("    a	REG_LEFT|REG_RIGHT	implicit ^...$\n");
+T("    b	REG_NOTBOL		lhs does not match ^\n");
+T("    c	REG_COMMENT		ignore space and #...\\n\n");
+T("    d	REG_SHELL_DOT		explicit leading . match\n");
+T("    e	REG_NOTEOL		rhs does not match $\n");
+T("    f	REG_MULTIPLE		multiple \\n separated patterns\n");
+T("    g	FNM_LEADING_DIR		testfnmatch only -- match until /\n");
+T("    h	REG_MULTIREF		multiple digit backref\n");
+T("    i	REG_ICASE		ignore case\n");
+T("    j	REG_SPAN		. matches \\n\n");
+T("    k	REG_ESCAPE		\\ to ecape [...] delimiter\n");
+T("    l	REG_LEFT		implicit ^...\n");
+T("    m	REG_MINIMAL		minimal match\n");
+T("    n	REG_NEWLINE		explicit \\n match\n");
+T("    o	REG_ENCLOSED		(|&) magic inside [@|&](...)\n");
+T("    p	REG_SHELL_PATH		explicit / match\n");
+T("    q	REG_DELIMITED		delimited pattern\n");
+T("    r	REG_RIGHT		implicit ...$\n");
+T("    s	REG_SHELL_ESCAPED	\\ not special\n");
+T("    t	REG_MUSTDELIM		all delimiters must be specified\n");
+T("    u	standard unspecified behavior -- errors not counted\n");
+T("    w	REG_NOSUB		no subexpression match array\n");
+T("    x	REG_LENIENT		let some errors slide\n");
+T("    y	REG_LEFT		regexec() implicit ^...\n");
+T("    z	REG_NULL		NULL subexpressions ok\n");
+T("    $	                        expand C \\c escapes in fields 2 and 3\n");
+T("    /	                        field 2 is a regsubcomp() expression\n");
+T("\n");
+T("  Field 1 control lines:\n");
+T("\n");
+T("    C		set LC_COLLATE and LC_CTYPE to locale in field 2\n");
+T("\n");
+T("    ?test ...	output field 5 if passed and != EXPECTED, silent otherwise\n");
+T("    &test ...	output field 5 if current and previous passed\n");
+T("    |test ...	output field 5 if current passed and previous failed\n");
+T("    ; ...	output field 2 if previous failed\n");
+T("    {test ...	skip if failed until }\n");
+T("    }		end of skip\n");
+T("\n");
+T("    : comment		comment copied as output NOTE\n");
+T("    :comment:test	:comment: ignored\n");
+T("    N[OTE] comment	comment copied as output NOTE\n");
+T("    T[EST] comment	comment\n");
+T("\n");
+T("    number		use number for nmatch (20 by default)\n");
+T("\n");
+T("  Field 2: the regular expression pattern; SAME uses the pattern from\n");
+T("    the previous specification.\n");
+T("\n");
+T("  Field 3: the string to match.\n");
+T("\n");
+T("  Field 4: the test outcome. This is either one of the posix error\n");
+T("    codes (with REG_ omitted) or the match array, a list of (m,n)\n");
+T("    entries with m and n being first and last+1 positions in the\n");
+T("    field 3 string, or NULL if REG_NOSUB is in effect and success\n");
+T("    is expected. BADPAT is acceptable in place of any regcomp(3)\n");
+T("    error code. The match[] array is initialized to (-2,-2) before\n");
+T("    each test. All array elements from 0 to nmatch-1 must be specified\n");
+T("    in the outcome. Unspecified endpoints (offset -1) are denoted by ?.\n");
+T("    Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a\n");
+T("    matched (?{...}) expression, where x is the text enclosed by {...},\n");
+T("    o is the expression ordinal counting from 1, and n is the length of\n");
+T("    the unmatched portion of the subject string. If x starts with a\n");
+T("    number then that is the return value of re_execf(), otherwise 0 is\n");
+T("    returned.\n");
+T("\n");
+T("  Field 5: optional comment appended to the report.\n");
+T("\n");
+T("CAVEAT\n");
+T("    If a regex implementation misbehaves with memory then all bets are off.\n");
+T("\n");
+T("CONTRIBUTORS\n");
+T("  Glenn Fowler    gsf@research.att.com        (ksh strmatch, regex extensions)\n");
+T("  David Korn      dgk@research.att.com        (ksh glob matcher)\n");
+T("  Doug McIlroy    mcilroy@dartmouth.edu       (ast regex/testre in C++)\n");
+T("  Tom Lord        lord@regexps.com            (rx tests)\n");
+T("  Henry Spencer   henry@zoo.toronto.edu       (original public regex)\n");
+T("  Andrew Hume     andrew@research.att.com     (gre tests)\n");
+T("  John Maddock    John_Maddock@compuserve.com (regex++ tests)\n");
+T("  Philip Hazel    ph10@cam.ac.uk              (pcre tests)\n");
+T("  Ville Laurikari vl@iki.fi                   (libtre tests)\n");
+H("</PRE>\n");
+H("</BODY>\n");
+H("</HTML>\n");
+}
+
+#ifndef elementsof
+#define elementsof(x)	(sizeof(x)/sizeof(x[0]))
+#endif
+
+#ifndef streq
+#define streq(a,b)	(*(a)==*(b)&&!strcmp(a,b))
+#endif
+
+#define HUNG		2
+#define NOTEST		(~0)
+
+#ifndef REG_TEST_DEFAULT
+#define REG_TEST_DEFAULT	0
+#endif
+
+#ifndef REG_EXEC_DEFAULT
+#define REG_EXEC_DEFAULT	0
+#endif
+
+static const char* unsupported[] =
+{
+	"BASIC",
+#ifndef REG_EXTENDED
+	"EXTENDED",
+#endif
+#ifndef REG_AUGMENTED
+	"AUGMENTED",
+#endif
+#ifndef REG_SHELL
+	"SHELL",
+#endif
+
+#ifndef REG_COMMENT
+	"COMMENT",
+#endif
+#ifndef REG_DELIMITED
+	"DELIMITED",
+#endif
+#ifndef REG_DISCIPLINE
+	"DISCIPLINE",
+#endif
+#ifndef REG_ESCAPE
+	"ESCAPE",
+#endif
+#ifndef REG_ICASE
+	"ICASE",
+#endif
+#ifndef REG_LEFT
+	"LEFT",
+#endif
+#ifndef REG_LENIENT
+	"LENIENT",
+#endif
+#ifndef REG_LITERAL
+	"LITERAL",
+#endif
+#ifndef REG_MINIMAL
+	"MINIMAL",
+#endif
+#ifndef REG_MULTIPLE
+	"MULTIPLE",
+#endif
+#ifndef REG_MULTIREF
+	"MULTIREF",
+#endif
+#ifndef REG_MUSTDELIM
+	"MUSTDELIM",
+#endif
+#ifndef REG_NEWLINE
+	"NEWLINE",
+#endif
+#ifndef REG_NOTBOL
+	"NOTBOL",
+#endif
+#ifndef REG_NOTEOL
+	"NOTEOL",
+#endif
+#ifndef REG_NULL
+	"NULL",
+#endif
+#ifndef REG_RIGHT
+	"RIGHT",
+#endif
+#ifndef REG_SHELL_DOT
+	"SHELL_DOT",
+#endif
+#ifndef REG_SHELL_ESCAPED
+	"SHELL_ESCAPED",
+#endif
+#ifndef REG_SHELL_GROUP
+	"SHELL_GROUP",
+#endif
+#ifndef REG_SHELL_PATH
+	"SHELL_PATH",
+#endif
+#ifndef REG_SPAN
+	"SPAN",
+#endif
+#if REG_NOSUB & REG_TEST_DEFAULT
+	"SUBMATCH",
+#endif
+#if !_REG_nexec
+	"regnexec",
+#endif
+#if !_REG_subcomp
+	"regsubcomp",
+#endif
+	0
+};
+
+#ifndef REG_COMMENT
+#define REG_COMMENT	NOTEST
+#endif
+#ifndef REG_DELIMITED
+#define REG_DELIMITED	NOTEST
+#endif
+#ifndef REG_ESCAPE
+#define REG_ESCAPE	NOTEST
+#endif
+#ifndef REG_ICASE
+#define REG_ICASE	NOTEST
+#endif
+#ifndef REG_LEFT
+#define REG_LEFT	NOTEST
+#endif
+#ifndef REG_LENIENT
+#define REG_LENIENT	0
+#endif
+#ifndef REG_MINIMAL
+#define REG_MINIMAL	NOTEST
+#endif
+#ifndef REG_MULTIPLE
+#define REG_MULTIPLE	NOTEST
+#endif
+#ifndef REG_MULTIREF
+#define REG_MULTIREF	NOTEST
+#endif
+#ifndef REG_MUSTDELIM
+#define REG_MUSTDELIM	NOTEST
+#endif
+#ifndef REG_NEWLINE
+#define REG_NEWLINE	NOTEST
+#endif
+#ifndef REG_NOTBOL
+#define REG_NOTBOL	NOTEST
+#endif
+#ifndef REG_NOTEOL
+#define REG_NOTEOL	NOTEST
+#endif
+#ifndef REG_NULL
+#define REG_NULL	NOTEST
+#endif
+#ifndef REG_RIGHT
+#define REG_RIGHT	NOTEST
+#endif
+#ifndef REG_SHELL_DOT
+#define REG_SHELL_DOT	NOTEST
+#endif
+#ifndef REG_SHELL_ESCAPED
+#define REG_SHELL_ESCAPED	NOTEST
+#endif
+#ifndef REG_SHELL_GROUP
+#define REG_SHELL_GROUP	NOTEST
+#endif
+#ifndef REG_SHELL_PATH
+#define REG_SHELL_PATH	NOTEST
+#endif
+#ifndef REG_SPAN
+#define REG_SPAN	NOTEST
+#endif
+
+#define REG_UNKNOWN	(-1)
+
+#ifndef REG_ENEWLINE
+#define REG_ENEWLINE	(REG_UNKNOWN-1)
+#endif
+#ifndef REG_ENULL
+#ifndef REG_EMPTY
+#define REG_ENULL	(REG_UNKNOWN-2)
+#else
+#define REG_ENULL	REG_EMPTY
+#endif
+#endif
+#ifndef REG_ECOUNT
+#define REG_ECOUNT	(REG_UNKNOWN-3)
+#endif
+#ifndef REG_BADESC
+#define REG_BADESC	(REG_UNKNOWN-4)
+#endif
+#ifndef REG_EMEM
+#define REG_EMEM	(REG_UNKNOWN-5)
+#endif
+#ifndef REG_EHUNG
+#define REG_EHUNG	(REG_UNKNOWN-6)
+#endif
+#ifndef REG_EBUS
+#define REG_EBUS	(REG_UNKNOWN-7)
+#endif
+#ifndef REG_EFAULT
+#define REG_EFAULT	(REG_UNKNOWN-8)
+#endif
+#ifndef REG_EFLAGS
+#define REG_EFLAGS	(REG_UNKNOWN-9)
+#endif
+#ifndef REG_EDELIM
+#define REG_EDELIM	(REG_UNKNOWN-9)
+#endif
+
+static const struct { int code; char* name; } codes[] =
+{
+	REG_UNKNOWN,	"UNKNOWN",
+	REG_NOMATCH,	"NOMATCH",
+	REG_BADPAT,	"BADPAT",
+	REG_ECOLLATE,	"ECOLLATE",
+	REG_ECTYPE,	"ECTYPE",
+	REG_EESCAPE,	"EESCAPE",
+	REG_ESUBREG,	"ESUBREG",
+	REG_EBRACK,	"EBRACK",
+	REG_EPAREN,	"EPAREN",
+	REG_EBRACE,	"EBRACE",
+	REG_BADBR,	"BADBR",
+	REG_ERANGE,	"ERANGE",
+	REG_ESPACE,	"ESPACE",
+	REG_BADRPT,	"BADRPT",
+	REG_ENEWLINE,	"ENEWLINE",
+	REG_ENULL,	"ENULL",
+	REG_ECOUNT,	"ECOUNT",
+	REG_BADESC,	"BADESC",
+	REG_EMEM,	"EMEM",
+	REG_EHUNG,	"EHUNG",
+	REG_EBUS,	"EBUS",
+	REG_EFAULT,	"EFAULT",
+	REG_EFLAGS,	"EFLAGS",
+	REG_EDELIM,	"EDELIM",
+};
+
+static struct
+{
+	regmatch_t	NOMATCH;
+	int		errors;
+	int		extracted;
+	int		ignored;
+	int		lineno;
+	int		passed;
+	int		signals;
+	int		unspecified;
+	int		verify;
+	int		warnings;
+	char*		file;
+	char*		stack;
+	char*		which;
+	jmp_buf		gotcha;
+#ifdef REG_DISCIPLINE
+	Disc_t		disc;
+#endif
+} state;
+
+static void
+quote(char* s, int len, unsigned long test)
+{
+	unsigned char*	u = (unsigned char*)s;
+	unsigned char*	e;
+	int		c;
+
+	if (!u)
+		printf("NIL");
+	else if (!*u && len <= 1)
+		printf("NULL");
+	else if (test & TEST_EXPAND)
+	{
+		if (len < 0)
+			len = strlen((char*)u);
+		e = u + len;
+		if (test & TEST_DELIMIT)
+			printf("\"");
+		while (u < e)
+			switch (c = *u++)
+			{
+			case '\\':
+				printf("\\\\");
+				break;
+			case '"':
+				if (test & TEST_DELIMIT)
+					printf("\\\"");
+				else
+					printf("\"");
+				break;
+			case '\a':
+				printf("\\a");
+				break;
+			case '\b':
+				printf("\\b");
+				break;
+			case 033:
+				printf("\\e");
+				break;
+			case '\f':
+				printf("\\f");
+				break;
+			case '\n':
+				printf("\\n");
+				break;
+			case '\r':
+				printf("\\r");
+				break;
+			case '\t':
+				printf("\\t");
+				break;
+			case '\v':
+				printf("\\v");
+				break;
+			default:
+				if (!iscntrl(c) && isprint(c))
+					putchar(c);
+				else
+					printf("\\x%02x", c);
+				break;
+			}
+		if (test & TEST_DELIMIT)
+			printf("\"");
+	}
+	else
+		printf("%s", s);
+}
+
+static void
+report(char* comment, char* fun, char* re, char* s, int len, char* msg, int flags, unsigned long test)
+{
+	if (state.file)
+		printf("%s:", state.file);
+	printf("%d:", state.lineno);
+	if (re)
+	{
+		printf(" ");
+		quote(re, -1, test|TEST_DELIMIT);
+		if (s)
+		{
+			printf(" versus ");
+			quote(s, len, test|TEST_DELIMIT);
+		}
+	}
+	if (test & TEST_UNSPECIFIED)
+	{
+		state.unspecified++;
+		printf(" unspecified behavior");
+	}
+	else
+		state.errors++;
+	if (state.which)
+		printf(" %s", state.which);
+	if (flags & REG_NOSUB)
+		printf(" NOSUB");
+	if (fun)
+		printf(" %s", fun);
+	if (comment[strlen(comment)-1] == '\n')
+		printf(" %s", comment);
+	else
+	{
+		printf(" %s: ", comment);
+		if (msg)
+			printf("%s: ", msg);
+	}
+}
+
+static void
+error(regex_t* preg, int code)
+{
+	char*	msg;
+	char	buf[256];
+
+	switch (code)
+	{
+	case REG_EBUS:
+		msg = "bus error";
+		break;
+	case REG_EFAULT:
+		msg = "memory fault";
+		break;
+	case REG_EHUNG:
+		msg = "did not terminate";
+		break;
+	default:
+		regerror(code, preg, msg = buf, sizeof buf);
+		break;
+	}
+	printf("%s\n", msg);
+}
+
+static void
+bad(char* comment, char* re, char* s, int len, unsigned long test)
+{
+	printf("bad test case ");
+	report(comment, NiL, re, s, len, NiL, 0, test);
+	exit(1);
+}
+
+static int
+escape(char* s)
+{
+	char*	b;
+	char*	t;
+	char*	q;
+	char*	e;
+	int	c;
+
+	for (b = t = s; *t = *s; s++, t++)
+		if (*s == '\\')
+			switch (*++s)
+			{
+			case '\\':
+				break;
+			case 'a':
+				*t = '\a';
+				break;
+			case 'b':
+				*t = '\b';
+				break;
+			case 'c':
+				if (*t = *++s)
+					*t &= 037;
+				else
+					s--;
+				break;
+			case 'e':
+			case 'E':
+				*t = 033;
+				break;
+			case 'f':
+				*t = '\f';
+				break;
+			case 'n':
+				*t = '\n';
+				break;
+			case 'r':
+				*t = '\r';
+				break;
+			case 's':
+				*t = ' ';
+				break;
+			case 't':
+				*t = '\t';
+				break;
+			case 'v':
+				*t = '\v';
+				break;
+			case 'u':
+			case 'x':
+				c = 0;
+				q = c == 'u' ? (s + 5) : (char*)0;
+				e = s + 1;
+				while (!e || !q || s < q)
+				{
+					switch (*++s)
+					{
+					case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+						c = (c << 4) + *s - 'a' + 10;
+						continue;
+					case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+						c = (c << 4) + *s - 'A' + 10;
+						continue;
+					case '0': case '1': case '2': case '3': case '4':
+					case '5': case '6': case '7': case '8': case '9':
+						c = (c << 4) + *s - '0';
+						continue;
+					case '{':
+					case '[':
+						if (s != e)
+						{
+							s--;
+							break;
+						}
+						e = 0;
+						continue;
+					case '}':
+					case ']':
+						if (e)
+							s--;
+						break;
+					default:
+						s--;
+						break;
+					}
+					break;
+				}
+				*t = c;
+				break;
+			case '0': case '1': case '2': case '3':
+			case '4': case '5': case '6': case '7':
+				c = *s - '0';
+				q = s + 2;
+				while (s < q)
+				{
+					switch (*++s)
+					{
+					case '0': case '1': case '2': case '3':
+					case '4': case '5': case '6': case '7':
+						c = (c << 3) + *s - '0';
+						break;
+					default:
+						q = --s;
+						break;
+					}
+				}
+				*t = c;
+				break;
+			default:
+				*(s + 1) = 0;
+				bad("invalid C \\ escape\n", s - 1, NiL, 0, 0);
+			}
+	return t - b;
+}
+
+static void
+matchoffprint(int off)
+{
+	switch (off)
+	{
+	case -2:
+		printf("X");
+		break;
+	case -1:
+		printf("?");
+		break;
+	default:
+		printf("%d", off);
+		break;
+	}
+}
+
+static void
+matchprint(regmatch_t* match, int nmatch, int nsub, char* ans, unsigned long test)
+{
+	int	i;
+
+	for (; nmatch > nsub + 1; nmatch--)
+		if ((match[nmatch-1].rm_so != -1 || match[nmatch-1].rm_eo != -1) && (!(test & TEST_IGNORE_POSITION) || match[nmatch-1].rm_so >= 0 && match[nmatch-1].rm_eo >= 0))
+			break;
+	for (i = 0; i < nmatch; i++)
+	{
+		printf("(");
+		matchoffprint(match[i].rm_so);
+		printf(",");
+		matchoffprint(match[i].rm_eo);
+		printf(")");
+	}
+	if (!(test & (TEST_ACTUAL|TEST_BASELINE)))
+	{
+		if (ans)
+			printf(" expected: %s", ans);
+		printf("\n");
+	}
+}
+
+static int
+matchcheck(regmatch_t* match, int nmatch, int nsub, char* ans, char* re, char* s, int len, int flags, unsigned long test)
+{
+	char*	p;
+	int	i;
+	int	m;
+	int	n;
+
+	if (streq(ans, "OK"))
+		return test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY);
+	for (i = 0, p = ans; i < nmatch && *p; i++)
+	{
+		if (*p == '{')
+		{
+#ifdef REG_DISCIPLINE
+			char*	x;
+
+			x = sfstruse(state.disc.sp);
+			if (strcmp(p, x))
+			{
+				if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+					return 0;
+				report("callout failed", NiL, re, s, len, NiL, flags, test);
+				quote(p, -1, test);
+				printf(" expected, ");
+				quote(x, -1, test);
+				printf(" returned\n");
+			}
+#endif
+			break;
+		}
+		if (*p++ != '(')
+			bad("improper answer\n", re, s, -1, test);
+		if (*p == '?')
+		{
+			m = -1;
+			p++;
+		}
+		else
+			m = strtol(p, &p, 10);
+		if (*p++ != ',')
+			bad("improper answer\n", re, s, -1, test);
+		if (*p == '?')
+		{
+			n = -1;
+			p++;
+		}
+		else
+			n = strtol(p, &p, 10);
+		if (*p++ != ')')
+			bad("improper answer\n", re, s, -1, test);
+		if (m!=match[i].rm_so || n!=match[i].rm_eo)
+		{
+			if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)))
+			{
+				report("failed: match was", NiL, re, s, len, NiL, flags, test);
+				matchprint(match, nmatch, nsub, ans, test);
+			}
+			return 0;
+		}
+	}
+	for (; i < nmatch; i++)
+	{
+		if (match[i].rm_so!=-1 || match[i].rm_eo!=-1)
+		{
+			if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_VERIFY)))
+			{
+				if ((test & TEST_IGNORE_POSITION) && (match[i].rm_so<0 || match[i].rm_eo<0))
+				{
+					state.ignored++;
+					return 0;
+				}
+				if (!(test & TEST_SUMMARY))
+				{
+					report("failed: match was", NiL, re, s, len, NiL, flags, test);
+					matchprint(match, nmatch, nsub, ans, test);
+				}
+			}
+			return 0;
+		}
+	}
+	if (!(test & TEST_IGNORE_OVER) && match[nmatch].rm_so != state.NOMATCH.rm_so)
+	{
+		if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY)))
+		{
+			report("failed: overran match array", NiL, re, s, len, NiL, flags, test);
+			matchprint(match, nmatch + 1, nsub, NiL, test);
+		}
+		return 0;
+	}
+	return 1;
+}
+
+static void
+sigunblock(int s)
+{
+#ifdef SIG_SETMASK
+	int		op;
+	sigset_t	mask;
+
+	sigemptyset(&mask);
+	if (s)
+	{
+		sigaddset(&mask, s);
+		op = SIG_UNBLOCK;
+	}
+	else op = SIG_SETMASK;
+	sigprocmask(op, &mask, NiL);
+#else
+#ifdef sigmask
+	sigsetmask(s ? (sigsetmask(0L) & ~sigmask(s)) : 0L);
+#endif
+#endif
+}
+
+static void
+gotcha(int sig)
+{
+	int	ret;
+
+	signal(sig, gotcha);
+	alarm(0);
+	state.signals++;
+	switch (sig)
+	{
+	case SIGALRM:
+		ret = REG_EHUNG;
+		break;
+	case SIGBUS:
+		ret = REG_EBUS;
+		break;
+	default:
+		ret = REG_EFAULT;
+		break;
+	}
+	sigunblock(sig);
+	longjmp(state.gotcha, ret);
+}
+
+static char*
+getline(FILE* fp)
+{
+	static char	buf[32 * 1024];
+
+	register char*	s = buf;
+	register char*	e = &buf[sizeof(buf)];
+	register char*	b;
+
+	for (;;)
+	{
+		if (!(b = fgets(s, e - s, fp)))
+			return 0;
+		state.lineno++;
+		s += strlen(s);
+		if (s == b || *--s != '\n' || s == b || *(s - 1) != '\\')
+		{
+			*s = 0;
+			break;
+		}
+		s--;
+	}
+	return buf;
+}
+
+static unsigned long
+note(unsigned long level, char* msg, unsigned long skip, unsigned long test)
+{
+	if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)) && !skip)
+	{
+		printf("NOTE\t");
+		if (msg)
+			printf("%s: ", msg);
+		printf("skipping lines %d", state.lineno);
+	}
+	return skip | level;
+}
+
+#define TABS(n)		&ts[7-((n)&7)]
+
+static char		ts[] = "\t\t\t\t\t\t\t";
+
+static unsigned long
+extract(int* tabs, char* spec, char* re, char* s, char* ans, char* msg, char* accept, regmatch_t* match, int nmatch, int nsub, unsigned long skip, unsigned long level, unsigned long test)
+{
+	if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_OK|TEST_PASS|TEST_SUMMARY))
+	{
+		state.extracted = 1;
+		if (test & TEST_OK)
+		{
+			state.passed++;
+			if ((test & TEST_VERIFY) && !(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+			{
+				if (msg && strcmp(msg, "EXPECTED"))
+					printf("NOTE\t%s\n", msg);
+				return skip;
+			}
+			test &= ~(TEST_PASS|TEST_QUERY);
+		}
+		if (test & (TEST_QUERY|TEST_VERIFY))
+		{
+			if (test & TEST_BASELINE)
+				test &= ~(TEST_BASELINE|TEST_PASS);
+			else
+				test |= TEST_PASS;
+			skip |= level;
+		}
+		if (!(test & TEST_OK))
+		{
+			if (test & TEST_UNSPECIFIED)
+				state.unspecified++;
+			else
+				state.errors++;
+		}
+		if (test & (TEST_PASS|TEST_SUMMARY))
+			return skip;
+		test &= ~TEST_DELIMIT;
+		printf("%s%s", spec, TABS(*tabs++));
+		if ((test & (TEST_BASELINE|TEST_SAME)) == (TEST_BASELINE|TEST_SAME))
+			printf("SAME");
+		else
+			quote(re, -1, test);
+		printf("%s", TABS(*tabs++));
+		quote(s, -1, test);
+		printf("%s", TABS(*tabs++));
+		if (!(test & (TEST_ACTUAL|TEST_BASELINE)) || !accept && !match)
+			printf("%s", ans);
+		else if (accept)
+			printf("%s", accept);
+		else
+			matchprint(match, nmatch, nsub, NiL, test);
+		if (msg)
+			printf("%s%s", TABS(*tabs++), msg);
+		putchar('\n');
+	}
+	else if (test & TEST_QUERY)
+		skip = note(level, msg, skip, test);
+	else if (test & TEST_VERIFY)
+		state.extracted = 1;
+	return skip;
+}
+
+static int
+catchfree(regex_t* preg, int flags, int* tabs, char* spec, char* re, char* s, char* ans, char* msg, char* accept, regmatch_t* match, int nmatch, int nsub, unsigned long skip, unsigned long level, unsigned long test)
+{
+	int	eret;
+
+	if (!(test & TEST_CATCH))
+	{
+		regfree(preg);
+		eret = 0;
+	}
+	else if (!(eret = setjmp(state.gotcha)))
+	{
+		alarm(HUNG);
+		regfree(preg);
+		alarm(0);
+	}
+	else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+		extract(tabs, spec, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
+	else
+	{
+		report("failed", "regfree", re, NiL, -1, msg, flags, test);
+		error(preg, eret);
+	}
+	return eret;
+}
+
+int
+main(int argc, char** argv)
+{
+	int		flags;
+	int		cflags;
+	int		eflags;
+	int		nmatch;
+	int		nexec;
+	int		nstr;
+	int		cret;
+	int		eret;
+	int		nsub;
+	int		i;
+	int		j;
+	int		expected;
+	int		got;
+	int		locale;
+	int		subunitlen;
+	int		testno;
+	unsigned long	level;
+	unsigned long	skip;
+	char*		p;
+	char*		line;
+	char*		spec;
+	char*		re;
+	char*		s;
+	char*		ans;
+	char*		msg;
+	char*		fun;
+	char*		ppat;
+	char*		subunit;
+	char*		version;
+	char*		field[6];
+	char*		delim[6];
+	FILE*		fp;
+	int		tabs[6];
+	char		unit[64];
+	regmatch_t	match[100];
+	regex_t		preg;
+
+	static char	pat[32 * 1024];
+
+	int		nonosub = REG_NOSUB == 0;
+	int		nonexec = 0;
+
+	unsigned long	test = 0;
+
+	static char*	filter[] = { "-", 0 };
+
+	state.NOMATCH.rm_so = state.NOMATCH.rm_eo = -2;
+	p = unit;
+	version = (char*)id + 10;
+	while (p < &unit[sizeof(unit)-1] && (*p = *version++) && !isspace(*p))
+		p++;
+	*p = 0;
+	while ((p = *++argv) && *p == '-')
+		for (;;)
+		{
+			switch (*++p)
+			{
+			case 0:
+				break;
+			case 'c':
+				test |= TEST_CATCH;
+				continue;
+			case 'e':
+				test |= TEST_IGNORE_ERROR;
+				continue;
+			case 'h':
+			case '?':
+				help(0);
+				return 2;
+			case '-':
+				help(p[1] == 'h');
+				return 2;
+			case 'n':
+				nonexec = 1;
+				continue;
+			case 'o':
+				test |= TEST_IGNORE_OVER;
+				continue;
+			case 'p':
+				test |= TEST_IGNORE_POSITION;
+				continue;
+			case 's':
+#ifdef REG_DISCIPLINE
+				if (!(state.stack = stkalloc(stkstd, 0)))
+					fprintf(stderr, "%s: out of space [stack]", unit);
+				state.disc.disc.re_resizef = resizef;
+				state.disc.disc.re_resizehandle = (void*)stkstd;
+#endif
+				continue;
+			case 'x':
+				nonosub = 1;
+				continue;
+			case 'v':
+				test |= TEST_VERBOSE;
+				continue;
+			case 'A':
+				test |= TEST_ACTUAL;
+				continue;
+			case 'B':
+				test |= TEST_BASELINE;
+				continue;
+			case 'F':
+				test |= TEST_FAIL;
+				continue;
+			case 'P':
+				test |= TEST_PASS;
+				continue;
+			case 'S':
+				test |= TEST_SUMMARY;
+				continue;
+			default:
+				fprintf(stderr, "%s: %c: invalid option\n", unit, *p);
+				return 2;
+			}
+			break;
+		}
+	if (!*argv)
+		argv = filter;
+	locale = 0;
+	while (state.file = *argv++)
+	{
+		if (streq(state.file, "-") || streq(state.file, "/dev/stdin") || streq(state.file, "/dev/fd/0"))
+		{
+			state.file = 0;
+			fp = stdin;
+		}
+		else if (!(fp = fopen(state.file, "r")))
+		{
+			fprintf(stderr, "%s: %s: cannot read\n", unit, state.file);
+			return 2;
+		}
+		testno = state.errors = state.ignored = state.lineno = state.passed =
+		state.signals = state.unspecified = state.warnings = 0;
+		skip = 0;
+		level = 1;
+		if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+		{
+			printf("TEST\t%s ", unit);
+			if (s = state.file)
+			{
+				subunit = p = 0;
+				for (;;)
+				{
+					switch (*s++)
+					{
+					case 0:
+						break;
+					case '/':
+						subunit = s;
+						continue;
+					case '.':
+						p = s - 1;
+						continue;
+					default:
+						continue;
+					}
+					break;
+				}
+				if (!subunit)
+					subunit = state.file;
+				if (p < subunit)
+					p = s - 1;
+				subunitlen = p - subunit;
+				printf("%-.*s ", subunitlen, subunit);
+			}
+			else
+				subunit = 0;
+			for (s = version; *s && (*s != ' ' || *(s + 1) != '$'); s++)
+				putchar(*s);
+			if (test & TEST_CATCH)
+				printf(", catch");
+			if (test & TEST_IGNORE_ERROR)
+				printf(", ignore error code mismatches");
+			if (test & TEST_IGNORE_POSITION)
+				printf(", ignore negative position mismatches");
+#ifdef REG_DISCIPLINE
+			if (state.stack)
+				printf(", stack");
+#endif
+			if (test & TEST_VERBOSE)
+				printf(", verbose");
+			printf("\n");
+#ifdef REG_VERSIONID
+			if (regerror(REG_VERSIONID, NiL, pat, sizeof(pat)) > 0)
+				s = pat;
+			else
+#endif
+#ifdef REG_TEST_VERSION
+			s = REG_TEST_VERSION;
+#else
+			s = "regex";
+#endif
+			printf("NOTE\t%s\n", s);
+			if (elementsof(unsupported) > 1)
+			{
+#if (REG_TEST_DEFAULT & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL)) || !defined(REG_EXTENDED)
+				i = 0;
+#else
+				i = REG_EXTENDED != 0;
+#endif
+				for (got = 0; i < elementsof(unsupported) - 1; i++)
+				{
+					if (!got)
+					{
+						got = 1;
+						printf("NOTE\tunsupported: %s", unsupported[i]);
+					}
+					else
+						printf(",%s", unsupported[i]);
+				}
+				if (got)
+					printf("\n");
+			}
+		}
+#ifdef REG_DISCIPLINE
+		state.disc.disc.re_version = REG_VERSION;
+		state.disc.disc.re_compf = compf;
+		state.disc.disc.re_execf = execf;
+		if (!(state.disc.sp = sfstropen()))
+			bad("out of space [discipline string stream]\n", NiL, NiL, 0, 0);
+		preg.re_disc = &state.disc.disc;
+#endif
+		if (test & TEST_CATCH)
+		{
+			signal(SIGALRM, gotcha);
+			signal(SIGBUS, gotcha);
+			signal(SIGSEGV, gotcha);
+		}
+		while (p = getline(fp))
+		{
+
+		/* parse: */
+
+			line = p;
+			if (*p == ':' && !isspace(*(p + 1)))
+			{
+				while (*++p && *p != ':');
+				if (!*p++)
+				{
+					if (test & TEST_BASELINE)
+						printf("%s\n", line);
+					continue;
+				}
+			}
+			while (isspace(*p))
+				p++;
+			if (*p == 0 || *p == '#' || *p == 'T')
+			{
+				if (test & TEST_BASELINE)
+					printf("%s\n", line);
+				continue;
+			}
+			if (*p == ':' || *p == 'N')
+			{
+				if (test & TEST_BASELINE)
+					printf("%s\n", line);
+				else if (!(test & (TEST_ACTUAL|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+				{
+					while (*++p && !isspace(*p));
+					while (isspace(*p))
+						p++;
+					printf("NOTE	%s\n", p);
+				}
+				continue;
+			}
+			j = 0;
+			i = 0;
+			field[i++] = p;
+			for (;;)
+			{
+				switch (*p++)
+				{
+				case 0:
+					p--;
+					j = 0;
+					goto checkfield;
+				case '\t':
+					*(delim[i] = p - 1) = 0;
+					j = 1;
+				checkfield:
+					s = field[i - 1];
+					if (streq(s, "NIL"))
+						field[i - 1] = 0;
+					else if (streq(s, "NULL"))
+						*s = 0;
+					while (*p == '\t')
+					{
+						p++;
+						j++;
+					}
+					tabs[i - 1] = j;
+					if (!*p)
+						break;
+					if (i >= elementsof(field))
+						bad("too many fields\n", NiL, NiL, 0, 0);
+					field[i++] = p;
+					/*FALLTHROUGH*/
+				default:
+					continue;
+				}
+				break;
+			}
+			if (!(spec = field[0]))
+				bad("NIL spec\n", NiL, NiL, 0, 0);
+
+		/* interpret: */
+
+			cflags = REG_TEST_DEFAULT;
+			eflags = REG_EXEC_DEFAULT;
+			test &= TEST_GLOBAL;
+			state.extracted = 0;
+			nmatch = 20;
+			nsub = -1;
+			for (p = spec; *p; p++)
+			{
+				if (isdigit(*p))
+				{
+					nmatch = strtol(p, &p, 10);
+					if (nmatch >= elementsof(match))
+						bad("nmatch must be < 100\n", NiL, NiL, 0, 0);
+					p--;
+					continue;
+				}
+				switch (*p)
+				{
+				case 'A':
+					test |= TEST_ARE;
+					continue;
+				case 'B':
+					test |= TEST_BRE;
+					continue;
+				case 'C':
+					if (!(test & TEST_QUERY) && !(skip & level))
+						bad("locale must be nested\n", NiL, NiL, 0, 0);
+					test &= ~TEST_QUERY;
+					if (locale)
+						bad("locale nesting not supported\n", NiL, NiL, 0, 0);
+					if (i != 2)
+						bad("locale field expected\n", NiL, NiL, 0, 0);
+					if (!(skip & level))
+					{
+#if defined(LC_COLLATE) && defined(LC_CTYPE)
+						s = field[1];
+						if (!s || streq(s, "POSIX"))
+							s = "C";
+						if (!(ans = setlocale(LC_COLLATE, s)) || streq(ans, "C") || streq(ans, "POSIX") || !(ans = setlocale(LC_CTYPE, s)) || streq(ans, "C") || streq(ans, "POSIX"))
+							skip = note(level, s, skip, test);
+						else
+						{
+							if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+								printf("NOTE	\"%s\" locale\n", s);
+							locale = level;
+						}
+#else
+						skip = note(level, skip, test, "locales not supported");
+#endif
+					}
+					cflags = NOTEST;
+					continue;
+				case 'E':
+					test |= TEST_ERE;
+					continue;
+				case 'K':
+					test |= TEST_KRE;
+					continue;
+				case 'L':
+					test |= TEST_LRE;
+					continue;
+				case 'S':
+					test |= TEST_SRE;
+					continue;
+
+				case 'a':
+					cflags |= REG_LEFT|REG_RIGHT;
+					continue;
+				case 'b':
+					eflags |= REG_NOTBOL;
+					continue;
+				case 'c':
+					cflags |= REG_COMMENT;
+					continue;
+				case 'd':
+					cflags |= REG_SHELL_DOT;
+					continue;
+				case 'e':
+					eflags |= REG_NOTEOL;
+					continue;
+				case 'f':
+					cflags |= REG_MULTIPLE;
+					continue;
+				case 'g':
+					cflags |= NOTEST;
+					continue;
+				case 'h':
+					cflags |= REG_MULTIREF;
+					continue;
+				case 'i':
+					cflags |= REG_ICASE;
+					continue;
+				case 'j':
+					cflags |= REG_SPAN;
+					continue;
+				case 'k':
+					cflags |= REG_ESCAPE;
+					continue;
+				case 'l':
+					cflags |= REG_LEFT;
+					continue;
+				case 'm':
+					cflags |= REG_MINIMAL;
+					continue;
+				case 'n':
+					cflags |= REG_NEWLINE;
+					continue;
+				case 'o':
+					cflags |= REG_SHELL_GROUP;
+					continue;
+				case 'p':
+					cflags |= REG_SHELL_PATH;
+					continue;
+				case 'q':
+					cflags |= REG_DELIMITED;
+					continue;
+				case 'r':
+					cflags |= REG_RIGHT;
+					continue;
+				case 's':
+					cflags |= REG_SHELL_ESCAPED;
+					continue;
+				case 't':
+					cflags |= REG_MUSTDELIM;
+					continue;
+				case 'u':
+					test |= TEST_UNSPECIFIED;
+					continue;
+				case 'w':
+					cflags |= REG_NOSUB;
+					continue;
+				case 'x':
+					if (REG_LENIENT)
+						cflags |= REG_LENIENT;
+					else
+						test |= TEST_LENIENT;
+					continue;
+				case 'y':
+					eflags |= REG_LEFT;
+					continue;
+				case 'z':
+					cflags |= REG_NULL;
+					continue;
+
+				case '$':
+					test |= TEST_EXPAND;
+					continue;
+
+				case '/':
+					test |= TEST_SUB;
+					continue;
+
+				case '?':
+					test |= TEST_VERIFY;
+					test &= ~(TEST_AND|TEST_OR);
+					state.verify = state.passed;
+					continue;
+				case '&':
+					test |= TEST_VERIFY|TEST_AND;
+					test &= ~TEST_OR;
+					continue;
+				case '|':
+					test |= TEST_VERIFY|TEST_OR;
+					test &= ~TEST_AND;
+					continue;
+				case ';':
+					test |= TEST_OR;
+					test &= ~TEST_AND;
+					continue;
+
+				case '{':
+					level <<= 1;
+					if (skip & (level >> 1))
+					{
+						skip |= level;
+						cflags = NOTEST;
+					}
+					else
+					{
+						skip &= ~level;
+						test |= TEST_QUERY;
+					}
+					continue;
+				case '}':
+					if (level == 1)
+						bad("invalid {...} nesting\n", NiL, NiL, 0, 0);
+					if ((skip & level) && !(skip & (level>>1)))
+					{
+						if (!(test & (TEST_BASELINE|TEST_SUMMARY)))
+						{
+							if (test & (TEST_ACTUAL|TEST_FAIL))
+								printf("}\n");
+							else if (!(test & TEST_PASS))
+								printf("-%d\n", state.lineno);
+						}
+					}
+#if defined(LC_COLLATE) && defined(LC_CTYPE)
+					else if (locale & level)
+					{
+						locale = 0;
+						if (!(skip & level))
+						{
+							s = "C";
+							setlocale(LC_COLLATE, s);
+							setlocale(LC_CTYPE, s);
+							if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_SUMMARY)))
+								printf("NOTE	\"%s\" locale\n", s);
+							else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_PASS))
+								printf("}\n");
+						}
+						else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL))
+							printf("}\n");
+					}
+#endif
+					level >>= 1;
+					cflags = NOTEST;
+					continue;
+
+				default:
+					bad("bad spec\n", spec, NiL, 0, test);
+					break;
+
+				}
+				break;
+			}
+			if ((cflags|eflags) == NOTEST || (skip & level) && (test & TEST_BASELINE))
+			{
+				if (test & TEST_BASELINE)
+				{
+					while (i > 1)
+						*delim[--i] = '\t';
+					printf("%s\n", line);
+				}
+				continue;
+			}
+			if (test & TEST_OR)
+			{
+				if (!(test & TEST_VERIFY))
+				{
+					test &= ~TEST_OR;
+					if (state.passed == state.verify && i > 1)
+						printf("NOTE\t%s\n", field[1]);
+					continue;
+				}
+				else if (state.passed > state.verify)
+					continue;
+			}
+			else if (test & TEST_AND)
+			{
+				if (state.passed == state.verify)
+					continue;
+				state.passed = state.verify;
+			}
+			if (i < 4)
+				bad("too few fields\n", NiL, NiL, 0, test);
+			while (i < elementsof(field))
+				field[i++] = 0;
+			if (re = field[1])
+			{
+				if (streq(re, "SAME"))
+				{
+					re = ppat;
+					test |= TEST_SAME;
+				}
+				else
+				{
+					if (test & TEST_EXPAND)
+						escape(re);
+					strcpy(ppat = pat, re);
+				}
+			}
+			else
+				ppat = 0;
+			nstr = -1;
+			if ((s = field[2]) && (test & TEST_EXPAND))
+			{
+				nstr = escape(s);
+#if _REG_nexec
+				if (nstr != strlen(s))
+					nexec = nstr;
+#endif
+			}
+			if (!(ans = field[3]))
+				bad("NIL answer\n", NiL, NiL, 0, test);
+			msg = field[4];
+			fflush(stdout);
+			if (test & TEST_SUB)
+#if _REG_subcomp
+				cflags |= REG_DELIMITED;
+#else
+				continue;
+#endif
+
+		compile:
+
+			if (state.extracted || (skip & level))
+				continue;
+#if !(REG_TEST_DEFAULT & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
+#ifdef REG_EXTENDED
+			if (REG_EXTENDED != 0 && (test & TEST_BRE))
+#else
+			if (test & TEST_BRE)
+#endif
+			{
+				test &= ~TEST_BRE;
+				flags = cflags;
+				state.which = "BRE";
+			}
+			else
+#endif
+#ifdef REG_EXTENDED
+			if (test & TEST_ERE)
+			{
+				test &= ~TEST_ERE;
+				flags = cflags | REG_EXTENDED;
+				state.which = "ERE";
+			}
+			else
+#endif
+#ifdef REG_AUGMENTED
+			if (test & TEST_ARE)
+			{
+				test &= ~TEST_ARE;
+				flags = cflags | REG_AUGMENTED;
+				state.which = "ARE";
+			}
+			else
+#endif
+#ifdef REG_LITERAL
+			if (test & TEST_LRE)
+			{
+				test &= ~TEST_LRE;
+				flags = cflags | REG_LITERAL;
+				state.which = "LRE";
+			}
+			else
+#endif
+#ifdef REG_SHELL
+			if (test & TEST_SRE)
+			{
+				test &= ~TEST_SRE;
+				flags = cflags | REG_SHELL;
+				state.which = "SRE";
+			}
+			else
+#ifdef REG_AUGMENTED
+			if (test & TEST_KRE)
+			{
+				test &= ~TEST_KRE;
+				flags = cflags | REG_SHELL | REG_AUGMENTED;
+				state.which = "KRE";
+			}
+			else
+#endif
+#endif
+			{
+				if (test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY))
+					extract(tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test|TEST_OK);
+				continue;
+			}
+			if ((test & (TEST_QUERY|TEST_VERBOSE|TEST_VERIFY)) == TEST_VERBOSE)
+			{
+				printf("test %-3d %s ", state.lineno, state.which);
+				quote(re, -1, test|TEST_DELIMIT);
+				printf(" ");
+				quote(s, nstr, test|TEST_DELIMIT);
+				printf("\n");
+			}
+
+		nosub:
+			fun = "regcomp";
+#if _REG_nexec
+			if (nstr >= 0 && nstr != strlen(s))
+				nexec = nstr;
+
+			else
+#endif
+				nexec = -1;
+			if (state.extracted || (skip & level))
+				continue;
+			if (!(test & TEST_QUERY))
+				testno++;
+#ifdef REG_DISCIPLINE
+			if (state.stack)
+				stkset(stkstd, state.stack, 0);
+			flags |= REG_DISCIPLINE;
+			state.disc.ordinal = 0;
+			sfstrseek(state.disc.sp, 0, SEEK_SET);
+#endif
+			if (!(test & TEST_CATCH))
+				cret = regcomp(&preg, re, flags);
+			else if (!(cret = setjmp(state.gotcha)))
+			{
+				alarm(HUNG);
+				cret = regcomp(&preg, re, flags);
+				alarm(0);
+			}
+#if _REG_subcomp
+			if (!cret && (test & TEST_SUB))
+			{
+				fun = "regsubcomp";
+				p = re + preg.re_npat;
+				if (!(test & TEST_CATCH))
+					cret = regsubcomp(&preg, p, NiL, 0, 0);
+				else if (!(cret = setjmp(state.gotcha)))
+				{
+					alarm(HUNG);
+					cret = regsubcomp(&preg, p, NiL, 0, 0);
+					alarm(0);
+				}
+				if (!cret && *(p += preg.re_npat) && !(preg.re_sub->re_flags & REG_SUB_LAST))
+				{
+					if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test))
+						continue;
+					cret = REG_EFLAGS;
+				}
+			}
+#endif
+			if (!cret)
+			{
+				if (!(flags & REG_NOSUB) && nsub < 0 && *ans == '(')
+				{
+					for (p = ans; *p; p++)
+						if (*p == '(')
+							nsub++;
+						else if (*p == '{')
+							nsub--;
+					if (nsub >= 0)
+					{
+						if (test & TEST_IGNORE_OVER)
+						{
+							if (nmatch > nsub)
+								nmatch = nsub + 1;
+						}
+						else if (nsub != preg.re_nsub)
+						{
+							if (nsub > preg.re_nsub)
+							{
+								if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+									skip = extract(tabs, line, re, s, ans, msg, "OK", NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+								else
+								{
+									report("re_nsub incorrect", fun, re, NiL, -1, msg, flags, test);
+									printf("at least %d expected, %d returned\n", nsub, preg.re_nsub);
+									state.errors++;
+								}
+							}
+							else
+								nsub = preg.re_nsub;
+						}
+					}
+				}
+				if (!(test & TEST_SUB) && *ans && *ans != '(' && !streq(ans, "OK") && !streq(ans, "NOMATCH"))
+				{
+					if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+						skip = extract(tabs, line, re, s, ans, msg, "OK", NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+					else if (!(test & TEST_LENIENT))
+					{
+						report("failed", fun, re, NiL, -1, msg, flags, test);
+						printf("%s expected, OK returned\n", ans);
+					}
+					catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
+					continue;
+				}
+			}
+			else
+			{
+				if (test & TEST_LENIENT)
+					/* we'll let it go this time */;
+				else if (!*ans || ans[0]=='(' || cret == REG_BADPAT && streq(ans, "NOMATCH"))
+				{
+					got = 0;
+					for (i = 1; i < elementsof(codes); i++)
+						if (cret==codes[i].code)
+							got = i;
+					if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+						skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+					else
+					{
+						report("failed", fun, re, NiL, -1, msg, flags, test);
+						printf("%s returned: ", codes[got].name);
+						error(&preg, cret);
+					}
+				}
+				else
+				{
+					expected = got = 0;
+					for (i = 1; i < elementsof(codes); i++)
+					{
+						if (streq(ans, codes[i].name))
+							expected = i;
+						if (cret==codes[i].code)
+							got = i;
+					}
+					if (!expected)
+					{
+						if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+							skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+						else
+						{
+							report("failed: invalid error code", NiL, re, NiL, -1, msg, flags, test);
+							printf("%s expected, %s returned\n", ans, codes[got].name);
+						}
+					}
+					else if (cret != codes[expected].code && cret != REG_BADPAT)
+					{
+						if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+							skip = extract(tabs, line, re, s, ans, msg, codes[got].name, NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+						else if (test & TEST_IGNORE_ERROR)
+							state.ignored++;
+						else
+						{
+							report("should fail and did", fun, re, NiL, -1, msg, flags, test);
+							printf("%s expected, %s returned: ", ans, codes[got].name);
+							state.errors--;
+							state.warnings++;
+							error(&preg, cret);
+						}
+					}
+				}
+				goto compile;
+			}
+
+#if _REG_nexec
+		execute:
+			if (nexec >= 0)
+				fun = "regnexec";
+			else
+#endif
+				fun = "regexec";
+			
+			for (i = 0; i < elementsof(match); i++)
+				match[i] = state.NOMATCH;
+
+#if _REG_nexec
+			if (nexec >= 0)
+			{
+				eret = regnexec(&preg, s, nexec, nmatch, match, eflags);
+				s[nexec] = 0;
+			}
+			else
+#endif
+			{
+				if (!(test & TEST_CATCH))
+					eret = regexec(&preg, s, nmatch, match, eflags);
+				else if (!(eret = setjmp(state.gotcha)))
+				{
+					alarm(HUNG);
+					eret = regexec(&preg, s, nmatch, match, eflags);
+					alarm(0);
+				}
+			}
+#if _REG_subcomp
+			if ((test & TEST_SUB) && !eret)
+			{
+				fun = "regsubexec";
+				if (!(test & TEST_CATCH))
+					eret = regsubexec(&preg, s, nmatch, match);
+				else if (!(eret = setjmp(state.gotcha)))
+				{
+					alarm(HUNG);
+					eret = regsubexec(&preg, s, nmatch, match);
+					alarm(0);
+				}
+			}
+#endif
+			if (flags & REG_NOSUB)
+			{
+				if (eret)
+				{
+					if (eret != REG_NOMATCH || !streq(ans, "NOMATCH"))
+					{
+						if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+							skip = extract(tabs, line, re, s, ans, msg, "NOMATCH", NiL, 0, 0, skip, level, test|TEST_DELIMIT);
+						else
+						{
+							report("REG_NOSUB failed", fun, re, s, nstr, msg, flags, test);
+							error(&preg, eret);
+						}
+					}
+				}
+				else if (streq(ans, "NOMATCH"))
+				{
+					if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+						skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT);
+					else
+					{
+						report("should fail and didn't", fun, re, s, nstr, msg, flags, test);
+						error(&preg, eret);
+					}
+				}
+			}
+			else if (eret)
+			{
+				if (eret != REG_NOMATCH || !streq(ans, "NOMATCH"))
+				{
+					if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+						skip = extract(tabs, line, re, s, ans, msg, "NOMATCH", NiL, 0, nsub, skip, level, test|TEST_DELIMIT);
+					else
+					{
+						report("failed", fun, re, s, nstr, msg, flags, test);
+						if (eret != REG_NOMATCH)
+							error(&preg, eret);
+						else if (*ans)
+							printf("expected: %s\n", ans);
+						else
+							printf("\n");
+					}
+				}
+			}
+			else if (streq(ans, "NOMATCH"))
+			{
+				if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+					skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT);
+				else
+				{
+					report("should fail and didn't", fun, re, s, nstr, msg, flags, test);
+					matchprint(match, nmatch, nsub, NiL, test);
+				}
+			}
+#if _REG_subcomp
+			else if (test & TEST_SUB)
+			{
+				p = preg.re_sub->re_buf;
+				if (strcmp(p, ans))
+				{
+					report("failed", fun, re, s, nstr, msg, flags, test);
+					quote(ans, -1, test|TEST_DELIMIT);
+					printf(" expected, ");
+					quote(p, -1, test|TEST_DELIMIT);
+					printf(" returned\n");
+				}
+			}
+#endif
+			else if (!*ans)
+			{
+				if (match[0].rm_so != state.NOMATCH.rm_so)
+				{
+					if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+						skip = extract(tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test);
+					else
+					{
+						report("failed: no match but match array assigned", NiL, re, s, nstr, msg, flags, test);
+						matchprint(match, nmatch, nsub, NiL, test);
+					}
+				}
+			}
+			else if (matchcheck(match, nmatch, nsub, ans, re, s, nstr, flags, test))
+			{
+#if _REG_nexec
+				if (nexec < 0 && !nonexec)
+				{
+					nexec = nstr >= 0 ? nstr : strlen(s);
+					s[nexec] = '\n';
+					testno++;
+					goto execute;
+				}
+#endif
+				if (!(test & (TEST_SUB|TEST_VERIFY)) && !nonosub)
+				{
+					if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test))
+						continue;
+					flags |= REG_NOSUB;
+					goto nosub;
+				}
+				if (test & (TEST_BASELINE|TEST_PASS|TEST_VERIFY))
+					skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_OK);
+			}
+			else if (test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS|TEST_QUERY|TEST_SUMMARY|TEST_VERIFY))
+				skip = extract(tabs, line, re, s, ans, msg, NiL, match, nmatch, nsub, skip, level, test|TEST_DELIMIT);
+			if (catchfree(&preg, flags, tabs, line, re, s, ans, msg, NiL, NiL, 0, 0, skip, level, test))
+				continue;
+			goto compile;
+		}
+		if (test & TEST_SUMMARY)
+			printf("tests=%-4d errors=%-4d warnings=%-2d ignored=%-2d unspecified=%-2d signals=%d\n", testno, state.errors, state.warnings, state.ignored, state.unspecified, state.signals);
+		else if (!(test & (TEST_ACTUAL|TEST_BASELINE|TEST_FAIL|TEST_PASS)))
+		{
+			printf("TEST\t%s", unit);
+			if (subunit)
+				printf(" %-.*s", subunitlen, subunit);
+			printf(", %d test%s", testno, testno == 1 ? "" : "s");
+			if (state.ignored)
+				printf(", %d ignored mismatche%s", state.ignored, state.ignored == 1 ? "" : "s");
+			if (state.warnings)
+				printf(", %d warning%s", state.warnings, state.warnings == 1 ? "" : "s");
+			if (state.unspecified)
+				printf(", %d unspecified difference%s", state.unspecified, state.unspecified == 1 ? "" : "s");
+			if (state.signals)
+				printf(", %d signal%s", state.signals, state.signals == 1 ? "" : "s");
+			printf(", %d error%s\n", state.errors, state.errors == 1 ? "" : "s");
+		}
+		if (fp != stdin)
+			fclose(fp);
+	}
+	return 0;
+}
diff --git a/testregex.html b/testregex.html
new file mode 100644
index 0000000..da80180
--- /dev/null
+++ b/testregex.html
@@ -0,0 +1,241 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/REC-html40/frameset.dtd">
+<HTML>
+<HEAD>
+<META name="generator" content="mm2html (AT&T Research) 2010-09-10">
+<META name="keywords" content="regular expression pattern match regression test">
+<TITLE> ../re/testregex.mm mm document </TITLE>
+<META name="author" content="gsf">
+</HEAD>
+<BODY bgcolor=white link=slateblue vlink=teal >
+<TABLE border=0 align=center width=96%>
+<TBODY><TR><TD valign=top align=left>
+<!--INDEX--><!--/INDEX-->
+<B><FONT size=-1 face="verdana,arial,helvetica,geneva,sans-serif">
+<TABLE align=center cellpadding=2 border=4 bgcolor=lightgrey><TR>
+<TD><A href="testregex.html#Reference Implementations">Reference Implementations</A></TD>
+<TD><A href="testregex.html#Test Data Repository">Test Data Repository</A></TD>
+<TD><A href="testregex.html#Usage">Usage</A></TD>
+<TD><A href="testregex.html#Reference Implementation Notes">Reference Implementation Notes</A></TD>
+<TD><A href="testregex.html#testregex Notes">testregex Notes</A></TD>
+</TR></TABLE>
+</FONT></B>
+<P>
+<HR>
+<CENTER>
+<H3><CENTER><FONT color=red><FONT face=courier>AT&amp;T Research regex(3) regression tests</FONT></FONT></CENTER></H3>
+<BR>Glenn Fowler <SMALL>&lt;<A href=mailto:gsf@research.att.com>gsf@research.att.com</A>&gt;</SMALL>
+<P><I>AT&amp;T Research - Florham Park NJ</I>
+</CENTER>
+<P><HR><P>
+	<A href="testregex.c">testregex.c 2004-05-31</A>
+is the latest source for the AT&amp;T Research regression test
+harness for the
+	<A href="http://www.opengroup.org/onlinepubs/007904975/functions/regcomp.html" target=_top>X/Open regex</A>
+pattern match interface.
+See
+<NOBR><A href="http://web.archive.org/~gsf/man/man1/testregex.html"><STRONG>testregex</STRONG></A>(1)</NOBR>
+for option and test input details.
+The source and test data posted here are license free.
+<P>
+<STRONG>testregex</STRONG>
+can:
+<UL type=square>
+<LI>
+verify stability for a particular implementation in the face of
+source code and/or compilation environment changes
+<LI>
+verify standard compliance for all implementations
+<LI>
+provide a basis for discussions on what
+<EM>compliance</EM>
+means
+</UL>
+<P>
+See
+	<A href="re-interpretation.html">An Interpretation of the POSIX regex Standards</A>
+for an analysis of the POSIX-X/Open
+<STRONG>regex</STRONG>
+standards.
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="Reference Implementations">Reference Implementations</A></H3></FONT></FONT></CENTER>
+<STRONG>testregex</STRONG>
+is currently built against these reference implementations:
+<P></P><TABLE border=0 frame=void rules=none width=100%><TBODY><TR><TD>
+<TABLE align=center bgcolor=papayawhip border=0 bordercolor=white cellpadding=2 cellspacing=2 frame=void rules=none >
+<TBODY>
+<TR><TD align=right>NAME&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;LABEL&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;AUTHORS</TD></TR>
+<TR><TD align=right>
+AT&amp;T ast&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://www.research.att.com/sw/download/" target=_top>A</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;Glenn Fowler and Doug McIlroy</TD></TR>
+<TR><TD align=right>
+bsd&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="ftp://ftp.netbsd.org/pub/NetBSD/NetBSD-1.5.2/source/sets/src.tgz" target=_top>B</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;</TD></TR>
+<TR><TD align=right>
+Bell Labs&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://www.bell-labs.com/" target=_top>D</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;Doug McIlroy</TD></TR>
+<TR><TD align=right>
+old gnu&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://www.gnu.org" target=_top>G</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;</TD></TR>
+<TR><TD align=right>
+gnu&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://www.gnu.org" target=_top>H</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;Isamu Hasegawa</TD></TR>
+<TR><TD align=right>
+irix&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://www.sgi.com" target=_top>I</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;</TD></TR>
+<TR><TD align=right>
+boost&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://www.boost.org/libs/regex/" target=_top>J</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;John Maddock</TD></TR>
+<TR><TD align=right>
+regex++&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://ourworld.compuserve.com/homepages/John_Maddock/regexpp.htm" target=_top>M</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;John Maddock</TD></TR>
+<TR><TD align=right>
+pcre perl compatible&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://www.pcre.org/" target=_top>P</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;Philip Hazel</TD></TR>
+<TR><TD align=right>
+rx&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="ftp://regexps.com/pub/src/hackerlab/" target=_top>R</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;Tom Lord</TD></TR>
+<TR><TD align=right>
+spencer&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://arglist.com/regex/rxspencer-alpha3.8.g2.tar.gz" target=_top>S</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;Henry Spencer</TD></TR>
+<TR><TD align=right>
+libtre&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://kouli.iki.fi/~vlaurika/libtre/" target=_top>T</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;Ville Laurikari</TD></TR>
+<TR><TD align=right>
+unix caldera&nbsp;&nbsp;</TD><TD align=center>&nbsp;&nbsp;<A href="http://unixtools.sourceforge.net/" target=_top>U</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;</TD></TR>
+</TBODY></TABLE></TD></TR></TBODY></TABLE>
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="Test Data Repository">Test Data Repository</A></H3></FONT></FONT></CENTER>
+<P></P><TABLE border=0 frame=void rules=none width=100%><TBODY><TR><TD>
+<TABLE align=center bgcolor=papayawhip border=0 bordercolor=white cellpadding=2 cellspacing=2 frame=void rules=none >
+<TBODY>
+<TR><TD align=right>
+<A href="basic.dat">basic.dat</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;&nbsp;basic regex(3) -- all implementations should pass these</TD></TR>
+<TR><TD align=right>
+<A href="categorize.dat">categorize.dat</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;&nbsp;<A href="re-categorize.html">implementation categorization</A></TD></TR>
+<TR><TD align=right>
+<A href="nullsubexpr.dat">nullsubexpr.dat</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;&nbsp;<A href="re-nullsubexpr.html">null (...)* tests</A></TD></TR>
+<TR><TD align=right>
+<A href="leftassoc.dat">leftassoc.dat</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;&nbsp;<A href="re-assoc.html">left associative catenation implementation must pass these</A></TD></TR>
+<TR><TD align=right>
+<A href="rightassoc.dat">rightassoc.dat</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;&nbsp;<A href="re-assoc.html">right associative catenation implementation must pass these</A></TD></TR>
+<TR><TD align=right>
+<A href="forcedassoc.dat">forcedassoc.dat</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;&nbsp;<A href="re-assoc.html">subexpression grouping to force associativity</A></TD></TR>
+<TR><TD align=right>
+<A href="repetition.dat">repetition.dat</A>&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;&nbsp;&nbsp;<A href="re-repetition.html">explicit vs. implicit repetitions</A></TD></TR>
+</TBODY></TABLE></TD></TR></TBODY></TABLE>
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="Usage">Usage</A></H3></FONT></FONT></CENTER>
+To run the
+<STRONG>basic.dat</STRONG>
+tests:
+<DIV style="padding-left:16px;text-indent:0px">
+<PRE>
+testregex &lt; basic.dat
+</DIV>
+</PRE>
+<P>
+If the local implementation hangs or dumps on some tests then run with
+the <STRONG>-c</STRONG> option.
+The <STRONG>-h</STRONG> option lists the test data format details.
+The test data files exercise all features;
+the test harness detects and ignores features not
+supported by the local implementation.
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="Reference Implementation Notes">Reference Implementation Notes</A></H3></FONT></FONT></CENTER>
+<P>
+<H4><A name="D: diet libc">D: diet libc</A></H4>
+The
+	<A href="http://www.fefe.de/dietlibc/" target=_top>diet libc</A>
+implementation is currently omitted because it fails all but one
+<STRONG>basic.dat</STRONG>
+test.
+<P>
+<H4><A name="P: PCRE">P: PCRE</A></H4>
+The
+<STRONG>P</STRONG>
+implementation emulates
+<NOBR><A href="http://web.archive.org/~gsf/man/man1/perl.html"><STRONG>perl</STRONG></A>(1)</NOBR>
+and is not X/Open compliant by design.
+The main differences are:
+<UL type=square>
+<LI>
+<STRONG>P</STRONG>
+<EM>leftmost-first</EM>
+matching as opposed to the X/Open
+<EM>leftmost-longest</EM>.
+<LI>
+<STRONG>REG_EXTENDED</STRONG>
+patterns only.
+</UL>
+<P>
+However, the
+<STRONG>P</STRONG>
+package regression tests, and
+<NOBR><A href="http://web.archive.org/~gsf/man/man1/perl.html"><STRONG>perl</STRONG></A>(1)</NOBR>
+features creeping into other implementations,
+make it reasonable to include here.
+<P>
+<P><HR><CENTER><FONT color=red><FONT face=courier><H3><A name="testregex Notes">testregex Notes</A></H3></FONT></FONT></CENTER>
+Extensions to the standard terminology are derived from the AT&amp;T
+implementation, unified under
+<STRONG>&lt;regex.h&gt;</STRONG>
+with these modes:
+<P></P><TABLE border=0 frame=void rules=none width=100%><TBODY><TR><TD>
+<TABLE align=center bgcolor=papayawhip border=1 bordercolor=white cellpadding=2 cellspacing=2 frame=box rules=all >
+<TBODY>
+<TR><TD align=center>MODE&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;FLAGS&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;DESCRIPTION</TD></TR>
+<TR><TD align=right>
+BRE&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;0&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;basic RE</TD></TR>
+<TR><TD align=right>
+ERE&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;REG_EXTENDED&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;egrep RE with perl (...) extensions</TD></TR>
+<TR><TD align=right>
+ARE&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;REG_AUGMENTED&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;ERE with ! negation, &lt;&gt; word boundaries</TD></TR>
+<TR><TD align=right>
+SRE&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;REG_SHELL&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;sh patterns</TD></TR>
+<TR><TD align=right>
+KRE&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;REG_SHELL|REG_AUGMENTED&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;ksh93 patterns: ! @ ( | &amp; ) { }</TD></TR>
+<TR><TD align=right>
+LRE&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;REG_LITERAL&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;fgrep patterns</TD></TR>
+</TBODY></TABLE></TD></TR></TBODY></TABLE>
+<P>
+and a few flags to handle
+<NOBR><A href="http://web.archive.org/~gsf/man/man3/fnmatch.html"><STRONG>fnmatch</STRONG></A>(3):</NOBR>
+<P></P><TABLE border=0 frame=void rules=none width=100%><TBODY><TR><TD>
+<TABLE align=center bgcolor=papayawhip border=1 bordercolor=white cellpadding=2 cellspacing=2 frame=box rules=all >
+<TBODY>
+<TR><TD align=left>regex FLAG&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;fnmatch FLAG</TD></TR>
+<TR><TD align=left>
+REG_SHELL_ESCAPED&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;FNM_NOESCAPE</TD></TR>
+<TR><TD align=left>
+REG_SHELL_PATH&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;FNM_PATHNAME</TD></TR>
+<TR><TD align=left>
+REG_SHELL_DOT&nbsp;&nbsp;</TD><TD align=left>&nbsp;&nbsp;FNM_PERIOD</TD></TR>
+</TBODY></TABLE></TD></TR></TBODY></TABLE>
+<P>
+The original
+<TT>testregex.c</TT>
+was done by Doug McIlroy at Bell Labs.
+The current implementation is maintained by Glenn Fowler <SMALL>&lt;<A href=mailto:gsf@research.att.com>gsf@research.att.com</A>&gt;</SMALL>.
+<P>
+<HR>
+<TABLE border=0 align=center width=96%>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right><A href="mailto:gsf@research.att.com?subject= ../re/testregex.mm mm document">Glenn Fowler</A></TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Information and Software Systems Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>AT&amp;T Labs Research</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>Florham Park NJ</TD>
+</TR>
+<TR>
+<TD align=left></TD>
+<TD align=center></TD>
+<TD align=right>March 22, 2011</TD>
+</TR>
+</TABLE>
+<P>
+
+</TD></TR></TBODY></TABLE>
+
+</BODY>
+</HTML>