1 | #From arnold Thu May 9 17:27:03 2002
|
---|
2 | #Return-Path: <arnold@skeeve.com>
|
---|
3 | #Received: (from arnold@localhost)
|
---|
4 | # by skeeve.com (8.11.6/8.11.6) id g49ER3K27925
|
---|
5 | # for arnold; Thu, 9 May 2002 17:27:03 +0300
|
---|
6 | #Date: Thu, 9 May 2002 17:27:03 +0300
|
---|
7 | #From: Aharon Robbins <arnold@skeeve.com>
|
---|
8 | #Message-Id: <200205091427.g49ER3K27925@skeeve.com>
|
---|
9 | #To: arnold@skeeve.com
|
---|
10 | #Subject: fixme
|
---|
11 | #X-SpamBouncer: 1.4 (10/07/01)
|
---|
12 | #X-SBRule: Pattern Match (Other Patterns) (Score: 4850)
|
---|
13 | #X-SBRule: Pattern Match (Spam Phone #) (Score: 0)
|
---|
14 | #X-SBClass: Blocked
|
---|
15 | #Status: O
|
---|
16 | #
|
---|
17 | #Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail
|
---|
18 | #From: laura@madonnaweb.com (laura fairhead)
|
---|
19 | #Newsgroups: comp.lang.awk
|
---|
20 | #Subject: bug in gawk3.1.0 regex code
|
---|
21 | #Date: Wed, 08 May 2002 23:31:40 GMT
|
---|
22 | #Organization: that'll be the daewooo :)
|
---|
23 | #Lines: 211
|
---|
24 | #Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE>
|
---|
25 | #Reply-To: laura@madonnaweb.com
|
---|
26 | #NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48)
|
---|
27 | #X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286])
|
---|
28 | #X-Newsreader: Forte Free Agent 1.21/32.243
|
---|
29 | #Xref: dfw-artgen comp.lang.awk:13059
|
---|
30 | #
|
---|
31 | #
|
---|
32 | #I believe I've just found a bug in gawk3.1.0 implementation of
|
---|
33 | #extended regular expressions. It seems to be down to the alternation
|
---|
34 | #operator; when using an end anchor '$' as a subexpression in an
|
---|
35 | #alternation and the entire matched RE is a nul-string it fails
|
---|
36 | #to match the end of string, for example;
|
---|
37 | #
|
---|
38 | #gsub(/$|2/,"x")
|
---|
39 | #print
|
---|
40 | #
|
---|
41 | #input = 12345
|
---|
42 | #expected output = 1x345x
|
---|
43 | #actual output = 1x345
|
---|
44 | #
|
---|
45 | #The start anchor '^' always works as expected;
|
---|
46 | #
|
---|
47 | #gsub(/^|2/,"x")
|
---|
48 | #print
|
---|
49 | #
|
---|
50 | #input = 12345
|
---|
51 | #expected output = x1x345
|
---|
52 | #actual output = x1x345
|
---|
53 | #
|
---|
54 | #This was with POSIX compliance enabled althought that doesn't
|
---|
55 | #effect the result.
|
---|
56 | #
|
---|
57 | #I checked on gawk3.0.6 and got exactly the same results however
|
---|
58 | #gawk2.15.6 gives the expected results.
|
---|
59 | #
|
---|
60 | #I'm about to post a bug report about this into gnu.utils.bug
|
---|
61 | #but I thought I'd post it here first in case anyone has
|
---|
62 | #any input/comments/whatever ....
|
---|
63 | #
|
---|
64 | #Complete test results were as follows;
|
---|
65 | #
|
---|
66 | #input 12345
|
---|
67 | #output gsub(/regex/,"x",input)
|
---|
68 | #
|
---|
69 | #regex output
|
---|
70 | #(^) x12345
|
---|
71 | #($) 12345x
|
---|
72 | #(^)|($) x12345x
|
---|
73 | #($)|(^) x12345x
|
---|
74 | #(2) 1x345
|
---|
75 | #(^)|2 x1x345
|
---|
76 | #2|(^) x1x345
|
---|
77 | #($)|2 1x345
|
---|
78 | #2|($) 1x345
|
---|
79 | #(2)|(^) x1x345
|
---|
80 | #(^)|(2) x1x345
|
---|
81 | #(2)|($) 1x345
|
---|
82 | #($)|(2) 1x345
|
---|
83 | #.((2)|(^)) x345
|
---|
84 | #.((^)|(2)) x345
|
---|
85 | #.((2)|($)) x34x
|
---|
86 | #.(($)|(2)) x34x
|
---|
87 | #x{0}((2)|(^)) x1x345
|
---|
88 | #x{0}((^)|(2)) x1x345
|
---|
89 | #x{0}((2)|($)) 1x345
|
---|
90 | #x{0}(($)|(2)) 1x345
|
---|
91 | #x*((2)|(^)) x1x345
|
---|
92 | #x*((^)|(2)) x1x345
|
---|
93 | #x*((2)|($)) 1x345
|
---|
94 | #x*(($)|(2)) 1x345
|
---|
95 | #
|
---|
96 | #Here's the test program I used, a few of the cases use ERE {n[,[m]]}
|
---|
97 | #operators so that will have to be commented out or have a check
|
---|
98 | #added or something (should have put a conditional in I know... ;-)
|
---|
99 | #
|
---|
100 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
101 | #
|
---|
102 | BEGIN{
|
---|
103 |
|
---|
104 | TESTSTR="12345"
|
---|
105 |
|
---|
106 | print "input "TESTSTR
|
---|
107 | print "output gsub(/regex/,\"x\",input)"
|
---|
108 | print ""
|
---|
109 |
|
---|
110 | print "regex output"
|
---|
111 | $0=TESTSTR
|
---|
112 | gsub(/(^)/,"x")
|
---|
113 | print "(^) "$0
|
---|
114 |
|
---|
115 | $0=TESTSTR
|
---|
116 | gsub(/($)/,"x")
|
---|
117 | print "($) "$0
|
---|
118 |
|
---|
119 | $0=TESTSTR
|
---|
120 | gsub(/(^)|($)/,"x")
|
---|
121 | print "(^)|($) "$0
|
---|
122 |
|
---|
123 | $0=TESTSTR
|
---|
124 | gsub(/($)|(^)/,"x")
|
---|
125 | print "($)|(^) "$0
|
---|
126 |
|
---|
127 | $0=TESTSTR
|
---|
128 | gsub(/2/,"x")
|
---|
129 | print "(2) "$0
|
---|
130 |
|
---|
131 | $0=TESTSTR
|
---|
132 | gsub(/(^)|2/,"x")
|
---|
133 | print "(^)|2 "$0
|
---|
134 |
|
---|
135 | $0=TESTSTR
|
---|
136 | gsub(/2|(^)/,"x")
|
---|
137 | print "2|(^) "$0
|
---|
138 |
|
---|
139 | $0=TESTSTR
|
---|
140 | gsub(/($)|2/,"x")
|
---|
141 | print "($)|2 "$0
|
---|
142 |
|
---|
143 | $0=TESTSTR
|
---|
144 | gsub(/2|($)/,"x")
|
---|
145 | print "2|($) "$0
|
---|
146 |
|
---|
147 | $0=TESTSTR
|
---|
148 | gsub(/(2)|(^)/,"x")
|
---|
149 | print "(2)|(^) "$0
|
---|
150 |
|
---|
151 | $0=TESTSTR
|
---|
152 | gsub(/(^)|(2)/,"x")
|
---|
153 | print "(^)|(2) "$0
|
---|
154 |
|
---|
155 | $0=TESTSTR
|
---|
156 | gsub(/(2)|($)/,"x")
|
---|
157 | print "(2)|($) "$0
|
---|
158 |
|
---|
159 | $0=TESTSTR
|
---|
160 | gsub(/($)|(2)/,"x")
|
---|
161 | print "($)|(2) "$0
|
---|
162 |
|
---|
163 | $0=TESTSTR
|
---|
164 | gsub(/.((2)|(^))/,"x")
|
---|
165 | print ".((2)|(^)) "$0
|
---|
166 |
|
---|
167 | $0=TESTSTR
|
---|
168 | gsub(/.((^)|(2))/,"x")
|
---|
169 | print ".((^)|(2)) "$0
|
---|
170 |
|
---|
171 | $0=TESTSTR
|
---|
172 | gsub(/.((2)|($))/,"x")
|
---|
173 | print ".((2)|($)) "$0
|
---|
174 |
|
---|
175 | $0=TESTSTR
|
---|
176 | gsub(/.(($)|(2))/,"x")
|
---|
177 | print ".(($)|(2)) "$0
|
---|
178 |
|
---|
179 | # $0=TESTSTR
|
---|
180 | # gsub(/x{0}((2)|(^))/,"x")
|
---|
181 | # print "x{0}((2)|(^)) "$0
|
---|
182 | #
|
---|
183 | # $0=TESTSTR
|
---|
184 | # gsub(/x{0}((^)|(2))/,"x")
|
---|
185 | # print "x{0}((^)|(2)) "$0
|
---|
186 | #
|
---|
187 | # $0=TESTSTR
|
---|
188 | # gsub(/x{0}((2)|($))/,"x")
|
---|
189 | # print "x{0}((2)|($)) "$0
|
---|
190 | #
|
---|
191 | # $0=TESTSTR
|
---|
192 | # gsub(/x{0}(($)|(2))/,"x")
|
---|
193 | # print "x{0}(($)|(2)) "$0
|
---|
194 |
|
---|
195 | $0=TESTSTR
|
---|
196 | gsub(/x*((2)|(^))/,"x")
|
---|
197 | print "x*((2)|(^)) "$0
|
---|
198 |
|
---|
199 | $0=TESTSTR
|
---|
200 | gsub(/x*((^)|(2))/,"x")
|
---|
201 | print "x*((^)|(2)) "$0
|
---|
202 |
|
---|
203 | $0=TESTSTR
|
---|
204 | gsub(/x*((2)|($))/,"x")
|
---|
205 | print "x*((2)|($)) "$0
|
---|
206 |
|
---|
207 | $0=TESTSTR
|
---|
208 | gsub(/x*(($)|(2))/,"x")
|
---|
209 | print "x*(($)|(2)) "$0
|
---|
210 |
|
---|
211 | # $0=TESTSTR
|
---|
212 | # gsub(/x{0}^/,"x")
|
---|
213 | # print "x{0}^ "$0
|
---|
214 | #
|
---|
215 | # $0=TESTSTR
|
---|
216 | # gsub(/x{0}$/,"x")
|
---|
217 | # print "x{0}$ "$0
|
---|
218 | #
|
---|
219 | # $0=TESTSTR
|
---|
220 | # gsub(/(x{0}^)|2/,"x")
|
---|
221 | # print "(x{0}^)|2 "$0
|
---|
222 | #
|
---|
223 | # $0=TESTSTR
|
---|
224 | # gsub(/(x{0}$)|2/,"x")
|
---|
225 | # print "(x{0}$)|2 "$0
|
---|
226 |
|
---|
227 |
|
---|
228 | }
|
---|
229 | #
|
---|
230 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
231 | #
|
---|
232 | #byefrom
|
---|
233 | #
|
---|
234 | #--
|
---|
235 | #laura fairhead # laura@madonnaweb.com http://lf.8k.com
|
---|
236 | # # if you are bored crack my sig.
|
---|
237 | #1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A
|
---|
238 | #EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E
|
---|
239 | #630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA
|
---|
240 | #8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978
|
---|
241 | #80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492
|
---|