1 | # From laura_fairhead@talk21.com Fri May 10 11:24:41 2002
|
---|
2 | # Return-Path: <laura_fairhead@talk21.com>
|
---|
3 | # Received: from localhost (aahz [127.0.0.1])
|
---|
4 | # by skeeve.com (8.11.2/8.11.2) with ESMTP id g4A8OdU01822
|
---|
5 | # for <arnold@localhost>; Fri, 10 May 2002 11:24:40 +0300
|
---|
6 | # Received: from actcom.co.il [192.114.47.1]
|
---|
7 | # by localhost with POP3 (fetchmail-5.7.4)
|
---|
8 | # for arnold@localhost (single-drop); Fri, 10 May 2002 11:24:40 +0300 (IDT)
|
---|
9 | # Received: by actcom.co.il (mbox arobbins)
|
---|
10 | # (with Cubic Circle's cucipop (v1.31 1998/05/13) Fri May 10 11:30:42 2002)
|
---|
11 | # X-From_: laura_fairhead@talk21.com Fri May 10 05:39:57 2002
|
---|
12 | # Received: from lmail.actcom.co.il by actcom.co.il with ESMTP
|
---|
13 | # (8.11.6/actcom-0.2) id g4A2dpw26380 for <arobbins@actcom.co.il>;
|
---|
14 | # Fri, 10 May 2002 05:39:52 +0300 (EET DST)
|
---|
15 | # (rfc931-sender: mail.actcom.co.il [192.114.47.13])
|
---|
16 | # Received: from f7.net (consort.superb.net [209.61.216.22])
|
---|
17 | # by lmail.actcom.co.il (8.11.6/8.11.6) with ESMTP id g4A2dxl10851
|
---|
18 | # for <arobbins@actcom.co.il>; Fri, 10 May 2002 05:39:59 +0300
|
---|
19 | # Received: from fencepost.gnu.org (fencepost.gnu.org [199.232.76.164])
|
---|
20 | # by f7.net (8.11.6/8.11.6) with ESMTP id g4A2dwN11097
|
---|
21 | # for <arnold@skeeve.com>; Thu, 9 May 2002 22:39:58 -0400
|
---|
22 | # Received: from [194.73.242.6] (helo=wmpmta04-app.mail-store.com)
|
---|
23 | # by fencepost.gnu.org with smtp (Exim 3.34 #1 (Debian))
|
---|
24 | # id 1760K4-0001QX-00
|
---|
25 | # for <bug-gawk@gnu.org>; Thu, 09 May 2002 22:39:56 -0400
|
---|
26 | # Received: from wmpmtavirtual ([10.216.84.15])
|
---|
27 | # by wmpmta04-app.mail-store.com
|
---|
28 | # (InterMail vM.5.01.02.00 201-253-122-103-101-20001108) with SMTP
|
---|
29 | # id <20020510023921.EEW24107.wmpmta04-app.mail-store.com@wmpmtavirtual>
|
---|
30 | # for <bug-gawk@gnu.org>; Fri, 10 May 2002 03:39:21 +0100
|
---|
31 | # Received: from 213.1.102.243 by t21web05-lrs ([10.216.84.15]); Fri, 10 May 02 03:38:42 GMT+01:00
|
---|
32 | # X-Mailer: talk21 v1.24 - http://talk21.btopenworld.com
|
---|
33 | # From: laura_fairhead@talk21.com
|
---|
34 | # To: bug-gawk@gnu.org
|
---|
35 | # X-Talk21Ref: none
|
---|
36 | # Date: Fri, 10 May 2002 03:38:42 GMT+01:00
|
---|
37 | # Subject: bug in gawk 3.1.0 regex code
|
---|
38 | # Mime-Version: 1.0
|
---|
39 | # Content-type: multipart/mixed; boundary="--GgOuLpDpIyE--1020998322088--"
|
---|
40 | # Message-Id: <20020510023921.EEW24107.wmpmta04-app.mail-store.com@wmpmtavirtual>
|
---|
41 | # X-SpamBouncer: 1.4 (10/07/01)
|
---|
42 | # X-SBClass: OK
|
---|
43 | # Status: RO
|
---|
44 | #
|
---|
45 | # Multipart Message Boundary - attachment/bodypart follows:
|
---|
46 | #
|
---|
47 | #
|
---|
48 | # ----GgOuLpDpIyE--1020998322088--
|
---|
49 | # Content-Type: text/plain
|
---|
50 | # Content-Transfer-Encoding: 7bit
|
---|
51 | #
|
---|
52 | #
|
---|
53 | # I believe I've just found a bug in gawk3.1.0 implementation of
|
---|
54 | # extended regular expressions. It seems to be down to the alternation
|
---|
55 | # operator; when using an end anchor '$' as a subexpression in an
|
---|
56 | # alternation and the entire matched RE is a nul-string it fails
|
---|
57 | # to match the end of string, for example;
|
---|
58 | #
|
---|
59 | # gsub(/$|2/,"x")
|
---|
60 | # print
|
---|
61 | #
|
---|
62 | # input = 12345
|
---|
63 | # expected output = 1x345x
|
---|
64 | # actual output = 1x345
|
---|
65 | #
|
---|
66 | # The start anchor '^' always works as expected;
|
---|
67 | #
|
---|
68 | # gsub(/^|2/,"x")
|
---|
69 | # print
|
---|
70 | #
|
---|
71 | # input = 12345
|
---|
72 | # expected output = x1x345
|
---|
73 | # actual output = x1x345
|
---|
74 | #
|
---|
75 | # This was with POSIX compliance enabled althought that doesn't
|
---|
76 | # effect the result.
|
---|
77 | #
|
---|
78 | # I checked on gawk3.0.6 and got exactly the same results however
|
---|
79 | # gawk2.15.6 gives the expected results.
|
---|
80 | #
|
---|
81 | # All the follow platforms produced the same results;
|
---|
82 | #
|
---|
83 | # gawk3.0.6 / Win98 / i386
|
---|
84 | # gawk3.1.0 / Win98 / i386
|
---|
85 | # gawk3.0.5 / Linux2.2.16 / i386
|
---|
86 | #
|
---|
87 | # Complete test results were as follows;
|
---|
88 | #
|
---|
89 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
90 | # regex input expected actual bug?
|
---|
91 | # -------------------------------------------------------------
|
---|
92 | # (^) 12345 x12345 x12345
|
---|
93 | # ($) 12345 12345x 12345x
|
---|
94 | # (^)|($) 12345 x12345x x12345x
|
---|
95 | # ($)|(^) 12345 x12345x x12345x
|
---|
96 | # 2 12345 1x345 1x345
|
---|
97 | # (^)|2 12345 x1x345 x1x345
|
---|
98 | # 2|(^) 12345 x1x345 x1x345
|
---|
99 | # ($)|2 12345 1x345x 1x345 **BUG**
|
---|
100 | # 2|($) 12345 1x345x 1x345 **BUG**
|
---|
101 | # (2)|(^) 12345 x1x345 x1x345
|
---|
102 | # (^)|(2) 12345 x1x345 x1x345
|
---|
103 | # (2)|($) 12345 1x345x 1x345 **BUG**
|
---|
104 | # ($)|(2) 12345 1x345x 1x345 **BUG**
|
---|
105 | # ((2)|(^)). 12345 xx45 xx45
|
---|
106 | # ((^)|(2)). 12345 xx45 xx45
|
---|
107 | # .((2)|($)) 12345 x34x x34x
|
---|
108 | # .(($)|(2)) 12345 x34x x34x
|
---|
109 | # (^)|6 12345 x12345 x12345
|
---|
110 | # 6|(^) 12345 x12345 x12345
|
---|
111 | # ($)|6 12345 12345x 12345x
|
---|
112 | # 6|($) 12345 12345x 12345x
|
---|
113 | # 2|6|(^) 12345 x1x345 x1x345
|
---|
114 | # 2|(^)|6 12345 x1x345 x1x345
|
---|
115 | # 6|2|(^) 12345 x1x345 x1x345
|
---|
116 | # 6|(^)|2 12345 x1x345 x1x345
|
---|
117 | # (^)|6|2 12345 x1x345 x1x345
|
---|
118 | # (^)|2|6 12345 x1x345 x1x345
|
---|
119 | # 2|6|($) 12345 1x345x 1x345 **BUG**
|
---|
120 | # 2|($)|6 12345 1x345x 1x345 **BUG**
|
---|
121 | # 6|2|($) 12345 1x345x 1x345 **BUG**
|
---|
122 | # 6|($)|2 12345 1x345x 1x345 **BUG**
|
---|
123 | # ($)|6|2 12345 1x345x 1x345 **BUG**
|
---|
124 | # ($)|2|6 12345 1x345x 1x345 **BUG**
|
---|
125 | # 2|4|(^) 12345 x1x3x5 x1x3x5
|
---|
126 | # 2|(^)|4 12345 x1x3x5 x1x3x5
|
---|
127 | # 4|2|(^) 12345 x1x3x5 x1x3x5
|
---|
128 | # 4|(^)|2 12345 x1x3x5 x1x3x5
|
---|
129 | # (^)|4|2 12345 x1x3x5 x1x3x5
|
---|
130 | # (^)|2|4 12345 x1x3x5 x1x3x5
|
---|
131 | # 2|4|($) 12345 1x3x5x 1x3x5 **BUG**
|
---|
132 | # 2|($)|4 12345 1x3x5x 1x3x5 **BUG**
|
---|
133 | # 4|2|($) 12345 1x3x5x 1x3x5 **BUG**
|
---|
134 | # 4|($)|2 12345 1x3x5x 1x3x5 **BUG**
|
---|
135 | # ($)|4|2 12345 1x3x5x 1x3x5 **BUG**
|
---|
136 | # ($)|2|4 12345 1x3x5x 1x3x5 **BUG**
|
---|
137 | # x{0}((2)|(^)) 12345 x1x345 x1x345
|
---|
138 | # x{0}((^)|(2)) 12345 x1x345 x1x345
|
---|
139 | # x{0}((2)|($)) 12345 1x345x 1x345 **BUG**
|
---|
140 | # x{0}(($)|(2)) 12345 1x345x 1x345 **BUG**
|
---|
141 | # x*((2)|(^)) 12345 x1x345 x1x345
|
---|
142 | # x*((^)|(2)) 12345 x1x345 x1x345
|
---|
143 | # x*((2)|($)) 12345 1x345x 1x345 **BUG**
|
---|
144 | # x*(($)|(2)) 12345 1x345x 1x345 **BUG**
|
---|
145 | # x{0}^ 12345 x12345 x12345
|
---|
146 | # x{0}$ 12345 12345x 12345x
|
---|
147 | # (x{0}^)|2 12345 x1x345 x1x345
|
---|
148 | # (x{0}$)|2 12345 1x345x 1x345 **BUG**
|
---|
149 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
150 | #
|
---|
151 | #
|
---|
152 | # Here's the test program I used, a few of the cases use ERE {n[,[m]]}
|
---|
153 | # operators so need '-W posix', (although the same results minus
|
---|
154 | # those tests came out without POSIX compliance enabled)
|
---|
155 | #
|
---|
156 | # [ Invocation was 'gawk -W posix -f tregex.awk' ]
|
---|
157 | #
|
---|
158 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
159 | # tregex.awk
|
---|
160 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
161 | BEGIN{
|
---|
162 | print _=sprintf("%-20s%-10s%-10s%-10s%-10s\n","regex","input","expected","actual","bug?")
|
---|
163 | OFS="-"
|
---|
164 | $(length(_)+1)=""
|
---|
165 | print $0
|
---|
166 |
|
---|
167 | while(getline <ARGV[1]) # ADR: was testre.dat
|
---|
168 | {
|
---|
169 | RE=$1;IN=$2;OUT=$3
|
---|
170 | $0=IN
|
---|
171 | gsub(RE,"x")
|
---|
172 | printf "%-20s%-10s%-10s%-10s%-10s\n",RE,IN,OUT,$0,$0==OUT?"":"**BUG**"
|
---|
173 | }
|
---|
174 | }
|
---|
175 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
176 | #
|
---|
177 | # This is the test data file used;
|
---|
178 | #
|
---|
179 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
180 | # testre.dat
|
---|
181 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
182 | # (^) 12345 x12345
|
---|
183 | # ($) 12345 12345x
|
---|
184 | # (^)|($) 12345 x12345x
|
---|
185 | # ($)|(^) 12345 x12345x
|
---|
186 | # 2 12345 1x345
|
---|
187 | # (^)|2 12345 x1x345
|
---|
188 | # 2|(^) 12345 x1x345
|
---|
189 | # ($)|2 12345 1x345x
|
---|
190 | # 2|($) 12345 1x345x
|
---|
191 | # (2)|(^) 12345 x1x345
|
---|
192 | # (^)|(2) 12345 x1x345
|
---|
193 | # (2)|($) 12345 1x345x
|
---|
194 | # ($)|(2) 12345 1x345x
|
---|
195 | # ((2)|(^)). 12345 xx45
|
---|
196 | # ((^)|(2)). 12345 xx45
|
---|
197 | # .((2)|($)) 12345 x34x
|
---|
198 | # .(($)|(2)) 12345 x34x
|
---|
199 | # (^)|6 12345 x12345
|
---|
200 | # 6|(^) 12345 x12345
|
---|
201 | # ($)|6 12345 12345x
|
---|
202 | # 6|($) 12345 12345x
|
---|
203 | # 2|6|(^) 12345 x1x345
|
---|
204 | # 2|(^)|6 12345 x1x345
|
---|
205 | # 6|2|(^) 12345 x1x345
|
---|
206 | # 6|(^)|2 12345 x1x345
|
---|
207 | # (^)|6|2 12345 x1x345
|
---|
208 | # (^)|2|6 12345 x1x345
|
---|
209 | # 2|6|($) 12345 1x345x
|
---|
210 | # 2|($)|6 12345 1x345x
|
---|
211 | # 6|2|($) 12345 1x345x
|
---|
212 | # 6|($)|2 12345 1x345x
|
---|
213 | # ($)|6|2 12345 1x345x
|
---|
214 | # ($)|2|6 12345 1x345x
|
---|
215 | # 2|4|(^) 12345 x1x3x5
|
---|
216 | # 2|(^)|4 12345 x1x3x5
|
---|
217 | # 4|2|(^) 12345 x1x3x5
|
---|
218 | # 4|(^)|2 12345 x1x3x5
|
---|
219 | # (^)|4|2 12345 x1x3x5
|
---|
220 | # (^)|2|4 12345 x1x3x5
|
---|
221 | # 2|4|($) 12345 1x3x5x
|
---|
222 | # 2|($)|4 12345 1x3x5x
|
---|
223 | # 4|2|($) 12345 1x3x5x
|
---|
224 | # 4|($)|2 12345 1x3x5x
|
---|
225 | # ($)|4|2 12345 1x3x5x
|
---|
226 | # ($)|2|4 12345 1x3x5x
|
---|
227 | # x{0}((2)|(^)) 12345 x1x345
|
---|
228 | # x{0}((^)|(2)) 12345 x1x345
|
---|
229 | # x{0}((2)|($)) 12345 1x345x
|
---|
230 | # x{0}(($)|(2)) 12345 1x345x
|
---|
231 | # x*((2)|(^)) 12345 x1x345
|
---|
232 | # x*((^)|(2)) 12345 x1x345
|
---|
233 | # x*((2)|($)) 12345 1x345x
|
---|
234 | # x*(($)|(2)) 12345 1x345x
|
---|
235 | # x{0}^ 12345 x12345
|
---|
236 | # x{0}$ 12345 12345x
|
---|
237 | # (x{0}^)|2 12345 x1x345
|
---|
238 | # (x{0}$)|2 12345 1x345x
|
---|
239 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
---|
240 | #
|
---|
241 | # I've attached a full copy of this e-mail in ZIP format
|
---|
242 | # in case of e-mail transport errors corrupting the data.
|
---|
243 | #
|
---|
244 | # I've posted the same bug report to gnu.utils.bug and
|
---|
245 | # it's being discussed in this thread on comp.lang.awk;
|
---|
246 | #
|
---|
247 | # From: laura@madonnaweb.com (laura fairhead)
|
---|
248 | # Newsgroups: comp.lang.awk
|
---|
249 | # Subject: bug in gawk3.1.0 regex code
|
---|
250 | # Date: Wed, 08 May 2002 23:31:40 GMT
|
---|
251 | # Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE>
|
---|
252 | #
|
---|
253 | #
|
---|
254 | # byefrom
|
---|
255 | #
|
---|
256 | # Laura Fairhead
|
---|
257 | #
|
---|
258 | #
|
---|
259 | #
|
---|
260 | #
|
---|
261 | # --------------------
|
---|
262 | # talk21 your FREE portable and private address on the net at http://www.talk21.com
|
---|
263 | # ----GgOuLpDpIyE--1020998322088--
|
---|
264 | # Content-Type: : application/zip;; Name="COPY.ZIP"
|
---|
265 | # Content-Transfer-Encoding: base64
|
---|
266 | # Content-Disposition: attachment; filename="COPY.ZIP"
|
---|
267 | #
|
---|
268 | # UEsDBBQAAAAIALoaqiyj8d/bjwMAAKsaAAADAAAARklMrVjfa+JAEH4P5H8ISwrRU9EYfbheKBR6
|
---|
269 | # xRcLvevbYbFtzsqJlBrpQr3722+zMWZ31pk1MaG0Q/m+nR87O9kvruM6/5p4XOc9WSTc05/l
|
---|
270 | # +m2bSivhb8lzmrx43vw53c5X2f+etourHOc63XMe1wlmLQ8+g3AYjaTFD2ZplY9g+xRbWly3
|
---|
271 | # NPastYMrQN9cs4DvHYz+dHbomY8SOTctGDlcQfXND1Uz6cK3EXcVdpY37ltSuB55u339cNtu
|
---|
272 | # F76NPTudHYR0zS2RZ/sd1maHVLdYI/cp31b2PvFW72jkvIi2tLTI94nXY/eCfeZK8Ap7GO1b
|
---|
273 | # u7QAO8+8FjsLfFx7OowtfW6dLYRv22wZ031uYYc7M/aK5xvEfjp7vDPnQxW2OZuqndDxWeyw
|
---|
274 | # dt6y5rXPt5xrqG8bW9a8tm8ZN1q1UyYTXvNT2HjN7VWLLL3GR7pl9nlUkx1Z+5xm2/qcYsu4
|
---|
275 | # z2KHtfOWNad6jR92jGN9jvm2sSNbn1vYlj4n2TLus9h4zW1s/tn/e3iHV55MOXumvUarsvVX
|
---|
276 | # +OknNGfrr/AK7DbMulLkbZh1VTa8uFSLHF5cqlVt5tW9eWRsH2VbVY10rp+TCu9Q6Rxj2/Ju
|
---|
277 | # SJE2KG5TqW57848/jS15fXM7mX66ztv7cp16j/FGGr8DdtEN+5uL7sD49WvNOkwGIv5KaS3+
|
---|
278 | # FsJamLmyFkYmrFnLde6+/4hZl7mOH6yS9SJ9DR5bXwatmLHCrd/PivTxulwlwSJJV8t14n1j
|
---|
279 | # abIRCfde5mm2iojx/ib2B5eTaeyHl3cPP2N/KNbsx5Op6yw226fg/qbDeIbNc/DoHAR6Mu2I
|
---|
280 | # dTp+X/zEsTCvGPvK9j0govsrfxqqdJN9cKhMY0vilwdPOebmRwqIy4+x+Tni+Hrc/PKAAnGZ
|
---|
281 | # 7pXH2fyaYK6X4+B9CcPBt/RRt9z8FoDhoOpH/QJ9j+KAkkf9As2O4oA6N/xy6RWo8OMoqLYN
|
---|
282 | # 1DDipqo+joIqEGtQqDWJRibXK9oO6igMB1Uu2XeKZwwHlSuO0zue6idVGVE4VQPheeiVIc8F
|
---|
283 | # sV6Bg6oRx+knkup3Kl8VR+Vb5qGru2N14SNTx2E4qNhwnH1/+chUYRROvfvjeejK6khdeLm/
|
---|
284 | # +HoFDqolHGfdX17sG5WviqPyLXBQ1WB9D/ULjSvHH9ZXUJOgOKA+UL9AZ1A4dThTftXxTOWh
|
---|
285 | # qgRs7kI9gF4gwM0fnVfgjo/F19A96T9QSwECFAAUAAAACAC6Gqoso/Hf248DAACrGgAAAwAA
|
---|
286 | # AAAAAAABACAAAAAAAAAARklMUEsFBgAAAAABAAEAMQAAALADAAAAAA==
|
---|
287 | # ----GgOuLpDpIyE--1020998322088----
|
---|
288 | #
|
---|
289 | #
|
---|
290 | #
|
---|