source: trunk/essentials/sys-apps/gawk/test/gsubtst4.awk

Last change on this file was 3076, checked in by bird, 18 years ago

gawk 3.1.5

File size: 5.5 KB
Line 
1# From arnold Thu May 9 17:27:03 2002
2# Return-Path: <arnold@skeeve.com>
3# Received: (from arnold@localhost)
4# by skeeve.com (8.11.6/8.11.6) id g49ER3K27925
5# for arnold; Thu, 9 May 2002 17:27:03 +0300
6# Date: Thu, 9 May 2002 17:27:03 +0300
7# From: Aharon Robbins <arnold@skeeve.com>
8# Message-Id: <200205091427.g49ER3K27925@skeeve.com>
9# To: arnold@skeeve.com
10# Subject: fixme
11# X-SpamBouncer: 1.4 (10/07/01)
12# X-SBRule: Pattern Match (Other Patterns) (Score: 4850)
13# X-SBRule: Pattern Match (Spam Phone #) (Score: 0)
14# X-SBClass: Blocked
15# Status: RO
16#
17# Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail
18# From: laura@madonnaweb.com (laura fairhead)
19# Newsgroups: comp.lang.awk
20# Subject: bug in gawk3.1.0 regex code
21# Date: Wed, 08 May 2002 23:31:40 GMT
22# Organization: that'll be the daewooo :)
23# Lines: 211
24# Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE>
25# Reply-To: laura@madonnaweb.com
26# NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48)
27# X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286])
28# X-Newsreader: Forte Free Agent 1.21/32.243
29# Xref: dfw-artgen comp.lang.awk:13059
30#
31#
32# I believe I've just found a bug in gawk3.1.0 implementation of
33# extended regular expressions. It seems to be down to the alternation
34# operator; when using an end anchor '$' as a subexpression in an
35# alternation and the entire matched RE is a nul-string it fails
36# to match the end of string, for example;
37#
38# gsub(/$|2/,"x")
39# print
40#
41# input = 12345
42# expected output = 1x345x
43# actual output = 1x345
44#
45# The start anchor '^' always works as expected;
46#
47# gsub(/^|2/,"x")
48# print
49#
50# input = 12345
51# expected output = x1x345
52# actual output = x1x345
53#
54# This was with POSIX compliance enabled althought that doesn't
55# effect the result.
56#
57# I checked on gawk3.0.6 and got exactly the same results however
58# gawk2.15.6 gives the expected results.
59#
60# I'm about to post a bug report about this into gnu.utils.bug
61# but I thought I'd post it here first in case anyone has
62# any input/comments/whatever ....
63#
64# Complete test results were as follows;
65#
66# input 12345
67# output gsub(/regex/,"x",input)
68#
69# regex output
70# (^) x12345
71# ($) 12345x
72# (^)|($) x12345x
73# ($)|(^) x12345x
74# (2) 1x345
75# (^)|2 x1x345
76# 2|(^) x1x345
77# ($)|2 1x345
78# 2|($) 1x345
79# (2)|(^) x1x345
80# (^)|(2) x1x345
81# (2)|($) 1x345
82# ($)|(2) 1x345
83# .((2)|(^)) x345
84# .((^)|(2)) x345
85# .((2)|($)) x34x
86# .(($)|(2)) x34x
87# x{0}((2)|(^)) x1x345
88# x{0}((^)|(2)) x1x345
89# x{0}((2)|($)) 1x345
90# x{0}(($)|(2)) 1x345
91# x*((2)|(^)) x1x345
92# x*((^)|(2)) x1x345
93# x*((2)|($)) 1x345
94# x*(($)|(2)) 1x345
95#
96# Here's the test program I used, a few of the cases use ERE {n[,[m]]}
97# operators so that will have to be commented out or have a check
98# added or something (should have put a conditional in I know... ;-)
99#
100# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101#
102BEGIN{
103
104TESTSTR="12345"
105
106print "input "TESTSTR
107print "output gsub(/regex/,\"x\",input)"
108print ""
109
110print "regex output"
111$0=TESTSTR
112gsub(/(^)/,"x")
113print "(^) "$0
114
115$0=TESTSTR
116gsub(/($)/,"x")
117print "($) "$0
118
119$0=TESTSTR
120gsub(/(^)|($)/,"x")
121print "(^)|($) "$0
122
123$0=TESTSTR
124gsub(/($)|(^)/,"x")
125print "($)|(^) "$0
126
127$0=TESTSTR
128gsub(/2/,"x")
129print "(2) "$0
130
131$0=TESTSTR
132gsub(/(^)|2/,"x")
133print "(^)|2 "$0
134
135$0=TESTSTR
136gsub(/2|(^)/,"x")
137print "2|(^) "$0
138
139$0=TESTSTR
140gsub(/($)|2/,"x")
141print "($)|2 "$0
142
143$0=TESTSTR
144gsub(/2|($)/,"x")
145print "2|($) "$0
146
147$0=TESTSTR
148gsub(/(2)|(^)/,"x")
149print "(2)|(^) "$0
150
151$0=TESTSTR
152gsub(/(^)|(2)/,"x")
153print "(^)|(2) "$0
154
155$0=TESTSTR
156gsub(/(2)|($)/,"x")
157print "(2)|($) "$0
158
159$0=TESTSTR
160gsub(/($)|(2)/,"x")
161print "($)|(2) "$0
162
163$0=TESTSTR
164gsub(/.((2)|(^))/,"x")
165print ".((2)|(^)) "$0
166
167$0=TESTSTR
168gsub(/.((^)|(2))/,"x")
169print ".((^)|(2)) "$0
170
171$0=TESTSTR
172gsub(/.((2)|($))/,"x")
173print ".((2)|($)) "$0
174
175$0=TESTSTR
176gsub(/.(($)|(2))/,"x")
177print ".(($)|(2)) "$0
178
179$0=TESTSTR
180gsub(/x{0}((2)|(^))/,"x")
181print "x{0}((2)|(^)) "$0
182
183$0=TESTSTR
184gsub(/x{0}((^)|(2))/,"x")
185print "x{0}((^)|(2)) "$0
186
187$0=TESTSTR
188gsub(/x{0}((2)|($))/,"x")
189print "x{0}((2)|($)) "$0
190
191$0=TESTSTR
192gsub(/x{0}(($)|(2))/,"x")
193print "x{0}(($)|(2)) "$0
194
195$0=TESTSTR
196gsub(/x*((2)|(^))/,"x")
197print "x*((2)|(^)) "$0
198
199$0=TESTSTR
200gsub(/x*((^)|(2))/,"x")
201print "x*((^)|(2)) "$0
202
203$0=TESTSTR
204gsub(/x*((2)|($))/,"x")
205print "x*((2)|($)) "$0
206
207$0=TESTSTR
208gsub(/x*(($)|(2))/,"x")
209print "x*(($)|(2)) "$0
210
211$0=TESTSTR
212gsub(/x{0}^/,"x")
213print "x{0}^ "$0
214
215$0=TESTSTR
216gsub(/x{0}$/,"x")
217print "x{0}$ "$0
218
219$0=TESTSTR
220gsub(/(x{0}^)|2/,"x")
221print "(x{0}^)|2 "$0
222
223$0=TESTSTR
224gsub(/(x{0}$)|2/,"x")
225print "(x{0}$)|2 "$0
226
227
228}
229#
230# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
231#
232# byefrom
233#
234# --
235# laura fairhead # laura@madonnaweb.com http://lf.8k.com
236# # if you are bored crack my sig.
237# 1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A
238# EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E
239# 630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA
240# 8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978
241# 80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492
242#
Note: See TracBrowser for help on using the repository browser.