1 | # From hankedr@dms.auburn.edu Sun Jan 28 12:25:43 2001
|
---|
2 | # Received: from mail.actcom.co.il [192.114.47.13]
|
---|
3 | # by localhost with POP3 (fetchmail-5.5.0)
|
---|
4 | # for arnold@localhost (single-drop); Sun, 28 Jan 2001 12:25:43 +0200 (IST)
|
---|
5 | # Received: by actcom.co.il (mbox arobbins)
|
---|
6 | # (with Cubic Circle's cucipop (v1.31 1998/05/13) Sun Jan 28 12:27:08 2001)
|
---|
7 | # X-From_: hankedr@dms.auburn.edu Sat Jan 27 15:15:57 2001
|
---|
8 | # Received: from lmail.actcom.co.il by actcom.co.il with ESMTP
|
---|
9 | # (8.9.1a/actcom-0.2) id PAA23801 for <arobbins@actcom.co.il>;
|
---|
10 | # Sat, 27 Jan 2001 15:15:55 +0200 (EET)
|
---|
11 | # (rfc931-sender: lmail.actcom.co.il [192.114.47.13])
|
---|
12 | # Received: from billohost.com (www.billohost.com [209.196.35.10])
|
---|
13 | # by lmail.actcom.co.il (8.9.3/8.9.1) with ESMTP id PAA15998
|
---|
14 | # for <arobbins@actcom.co.il>; Sat, 27 Jan 2001 15:16:27 +0200
|
---|
15 | # Received: from yak.dms.auburn.edu (yak.dms.auburn.edu [131.204.53.2])
|
---|
16 | # by billohost.com (8.9.3/8.9.3) with ESMTP id IAA00467
|
---|
17 | # for <arnold@skeeve.com>; Sat, 27 Jan 2001 08:15:52 -0500
|
---|
18 | # Received: (from hankedr@localhost)
|
---|
19 | # by yak.dms.auburn.edu (8.9.3/8.9.3/Debian/GNU) id HAA24441;
|
---|
20 | # Sat, 27 Jan 2001 07:15:44 -0600
|
---|
21 | # Date: Sat, 27 Jan 2001 07:15:44 -0600
|
---|
22 | # Message-Id: <200101271315.HAA24441@yak.dms.auburn.edu>
|
---|
23 | # From: Darrel Hankerson <hankedr@dms.auburn.edu>
|
---|
24 | # To: arnold@skeeve.com
|
---|
25 | # Subject: [stolfi@ic.unicamp.br: Bug in [...]* matching with acute-u]
|
---|
26 | # Mime-Version: 1.0 (generated by tm-edit 7.106)
|
---|
27 | # Content-Type: message/rfc822
|
---|
28 | # Status: R
|
---|
29 | #
|
---|
30 | # From: Jorge Stolfi <stolfi@ic.unicamp.br>
|
---|
31 | # To: bug-gnu-utils@gnu.org
|
---|
32 | # Subject: Bug in [...]* matching with acute-u
|
---|
33 | # MIME-Version: 1.0
|
---|
34 | # Reply-To: stolfi@ic.unicamp.br
|
---|
35 | # X-MIME-Autoconverted: from 8bit to quoted-printable by grande.dcc.unicamp.br id GAA10716
|
---|
36 | # Sender: bug-gnu-utils-admin@gnu.org
|
---|
37 | # Errors-To: bug-gnu-utils-admin@gnu.org
|
---|
38 | # X-BeenThere: bug-gnu-utils@gnu.org
|
---|
39 | # X-Mailman-Version: 2.0
|
---|
40 | # Precedence: bulk
|
---|
41 | # List-Help: <mailto:bug-gnu-utils-request@gnu.org?subject=help>
|
---|
42 | # List-Post: <mailto:bug-gnu-utils@gnu.org>
|
---|
43 | # List-Subscribe: <http://mail.gnu.org/mailman/listinfo/bug-gnu-utils>,
|
---|
44 | # <mailto:bug-gnu-utils-request@gnu.org?subject=subscribe>
|
---|
45 | # List-Id: Bug reports for the GNU utilities <bug-gnu-utils.gnu.org>
|
---|
46 | # List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/bug-gnu-utils>,
|
---|
47 | # <mailto:bug-gnu-utils-request@gnu.org?subject=unsubscribe>
|
---|
48 | # List-Archive: <http://mail.gnu.org/pipermail/bug-gnu-utils/>
|
---|
49 | # Date: Sat, 27 Jan 2001 06:46:11 -0200 (EDT)
|
---|
50 | # Content-Transfer-Encoding: 8bit
|
---|
51 | # X-MIME-Autoconverted: from quoted-printable to 8bit by manatee.dms.auburn.edu id CAA14936
|
---|
52 | # Content-Type: text/plain; charset=iso-8859-1
|
---|
53 | # <mailto:bug-gnu-utils-request@gnu.org?subject=subscribe>
|
---|
54 | # <mailto:bug-gnu-utils-request@gnu.org?subject=uns
|
---|
55 | # Content-Length: 3137
|
---|
56 | #
|
---|
57 | #
|
---|
58 | #
|
---|
59 | # Hi,
|
---|
60 | #
|
---|
61 | # I think I have run into a bug in gawk's handling of REs of the
|
---|
62 | # form [...]* when the bracketed list includes certain 8-bit characters,
|
---|
63 | # specifically u-acute (octal \372).
|
---|
64 | #
|
---|
65 | # The problem occurs in GNU Awk 3.0.4, both under
|
---|
66 | # Linux 2.2.14-5.0 (intel i686) and SunOS 5.5 (Sun sparc).
|
---|
67 | #
|
---|
68 | # Here is a program that illustrates the bug, and its output.
|
---|
69 | # The first two lines of the output should be equal, shouldn't they?
|
---|
70 | #
|
---|
71 | # ----------------------------------------------------------------------
|
---|
72 | #! /usr/bin/gawk -f
|
---|
73 |
|
---|
74 | BEGIN {
|
---|
75 | s = "bananas and ananases in canaan";
|
---|
76 | t = s; gsub(/[an]*n/, "AN", t); printf "%-8s %s\n", "[an]*n", t;
|
---|
77 | t = s; gsub(/[anú]*n/, "AN", t); printf "%-8s %s\n", "[anú]*n", t;
|
---|
78 | print "";
|
---|
79 | t = s; gsub(/[aú]*n/, "AN", t); printf "%-8s %s\n", "[aú]*n", t;
|
---|
80 | print "";
|
---|
81 | t = s; gsub(/[an]n/, "AN", t); printf "%-8s %s\n", "[an]n", t;
|
---|
82 | t = s; gsub(/[aú]n/, "AN", t); printf "%-8s %s\n", "[aú]n", t;
|
---|
83 | t = s; gsub(/[anú]n/, "AN", t); printf "%-8s %s\n", "[anú]n", t;
|
---|
84 | print "";
|
---|
85 | t = s; gsub(/[an]?n/, "AN", t); printf "%-8s %s\n", "[an]?n", t;
|
---|
86 | t = s; gsub(/[aú]?n/, "AN", t); printf "%-8s %s\n", "[aú]?n", t;
|
---|
87 | t = s; gsub(/[anú]?n/, "AN", t); printf "%-8s %s\n", "[anú]?n", t;
|
---|
88 | print "";
|
---|
89 | t = s; gsub(/[an]+n/, "AN", t); printf "%-8s %s\n", "[an]+n", t;
|
---|
90 | t = s; gsub(/[aú]+n/, "AN", t); printf "%-8s %s\n", "[aú]+n", t;
|
---|
91 | t = s; gsub(/[anú]+n/, "AN", t); printf "%-8s %s\n", "[anú]+n", t;
|
---|
92 | }
|
---|
93 | # ----------------------------------------------------------------------
|
---|
94 | # [an]*n bANas ANd ANases iAN cAN
|
---|
95 | # [anú]*n bananas and ananases in canaan
|
---|
96 | #
|
---|
97 | # [aú]*n bANANas ANd ANANases iAN cANAN
|
---|
98 | #
|
---|
99 | # [an]n bANANas ANd ANANases in cANaAN
|
---|
100 | # [aú]n bANANas ANd ANANases in cANaAN
|
---|
101 | # [anú]n bANANas ANd ANANases in cANaAN
|
---|
102 | #
|
---|
103 | # [an]?n bANANas ANd ANANases iAN cANaAN
|
---|
104 | # [aú]?n bANANas ANd ANANases iAN cANaAN
|
---|
105 | # [anú]?n bANANas ANd ANANases iAN cANaAN
|
---|
106 | #
|
---|
107 | # [an]+n bANas ANd ANases in cAN
|
---|
108 | # [aú]+n bANANas ANd ANANases in cANAN
|
---|
109 | # [anú]+n bananas and ananases in canaan
|
---|
110 | # ----------------------------------------------------------------------
|
---|
111 | #
|
---|
112 | # Apparently the problem is specific to u-acute; I've tried several
|
---|
113 | # other 8-bit characters and they seem to behave as expected.
|
---|
114 | #
|
---|
115 | # By comparing the second and third output lines, it would seem that the
|
---|
116 | # problem involves backtracking out of a partial match of [...]* in
|
---|
117 | # order to match the next sub-expression, when the latter begins with
|
---|
118 | # one of the given characters.
|
---|
119 | #
|
---|
120 | #
|
---|
121 | # All the best,
|
---|
122 | #
|
---|
123 | # --stolfi
|
---|
124 | #
|
---|
125 | # ------------------------------------------------------------------------
|
---|
126 | # Jorge Stolfi | http://www.dcc.unicamp.br/~stolfi | stolfi@dcc.unicamp.br
|
---|
127 | # Institute of Computing (formerly DCC-IMECC) | Wrk +55 (19)3788-5858
|
---|
128 | # Universidade Estadual de Campinas (UNICAMP) | +55 (19)3788-5840
|
---|
129 | # Av. Albert Einstein 1251 - Caixa Postal 6176 | Fax +55 (19)3788-5847
|
---|
130 | # 13083-970 Campinas, SP -- Brazil | Hom +55 (19)3287-4069
|
---|
131 | # ------------------------------------------------------------------------
|
---|
132 | #
|
---|
133 | # _______________________________________________
|
---|
134 | # Bug-gnu-utils mailing list
|
---|
135 | # Bug-gnu-utils@gnu.org
|
---|
136 | # http://mail.gnu.org/mailman/listinfo/bug-gnu-utils
|
---|
137 | #
|
---|
138 | #
|
---|