| 1 | #!/usr/local/bin/python
|
|---|
| 2 | # -*- coding: iso-8859-1 -*-
|
|---|
| 3 |
|
|---|
| 4 | # $Id$
|
|---|
| 5 |
|
|---|
| 6 | # Copyright (c) 2004, 2008 Kungliga Tekniska Högskolan
|
|---|
| 7 | # (Royal Institute of Technology, Stockholm, Sweden).
|
|---|
| 8 | # All rights reserved.
|
|---|
| 9 | #
|
|---|
| 10 | # Redistribution and use in source and binary forms, with or without
|
|---|
| 11 | # modification, are permitted provided that the following conditions
|
|---|
| 12 | # are met:
|
|---|
| 13 | #
|
|---|
| 14 | # 1. Redistributions of source code must retain the above copyright
|
|---|
| 15 | # notice, this list of conditions and the following disclaimer.
|
|---|
| 16 | #
|
|---|
| 17 | # 2. Redistributions in binary form must reproduce the above copyright
|
|---|
| 18 | # notice, this list of conditions and the following disclaimer in the
|
|---|
| 19 | # documentation and/or other materials provided with the distribution.
|
|---|
| 20 | #
|
|---|
| 21 | # 3. Neither the name of the Institute nor the names of its contributors
|
|---|
| 22 | # may be used to endorse or promote products derived from this software
|
|---|
| 23 | # without specific prior written permission.
|
|---|
| 24 | #
|
|---|
| 25 | # THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
|---|
| 26 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|---|
| 27 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|---|
| 28 | # ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
|---|
| 29 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|---|
| 30 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|---|
| 31 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|---|
| 32 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|---|
| 33 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|---|
| 34 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|---|
| 35 | # SUCH DAMAGE.
|
|---|
| 36 |
|
|---|
| 37 | import re
|
|---|
| 38 | import string
|
|---|
| 39 |
|
|---|
| 40 | def read():
|
|---|
| 41 | """return a dict of tables from rfc4518"""
|
|---|
| 42 |
|
|---|
| 43 | ret = {}
|
|---|
| 44 |
|
|---|
| 45 | #2.2. Map
|
|---|
| 46 | #
|
|---|
| 47 | # SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
|
|---|
| 48 | # points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
|
|---|
| 49 | # VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
|
|---|
| 50 | # mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
|
|---|
| 51 | # mapped to nothing.
|
|---|
| 52 |
|
|---|
| 53 | t = []
|
|---|
| 54 | t.append(" 00AD; ; Map to nothing")
|
|---|
| 55 | t.append(" 1806; ; Map to nothing")
|
|---|
| 56 | t.append(" 034F; ; Map to nothing")
|
|---|
| 57 |
|
|---|
| 58 | t.append(" 180B; ; Map to nothing")
|
|---|
| 59 | t.append(" 180C; ; Map to nothing")
|
|---|
| 60 | t.append(" 180D; ; Map to nothing")
|
|---|
| 61 |
|
|---|
| 62 | t.append(" FE00; ; Map to nothing")
|
|---|
| 63 | t.append(" FE01; ; Map to nothing")
|
|---|
| 64 | t.append(" FE02; ; Map to nothing")
|
|---|
| 65 | t.append(" FE03; ; Map to nothing")
|
|---|
| 66 | t.append(" FE04; ; Map to nothing")
|
|---|
| 67 | t.append(" FE05; ; Map to nothing")
|
|---|
| 68 | t.append(" FE06; ; Map to nothing")
|
|---|
| 69 | t.append(" FE07; ; Map to nothing")
|
|---|
| 70 | t.append(" FE08; ; Map to nothing")
|
|---|
| 71 | t.append(" FE09; ; Map to nothing")
|
|---|
| 72 | t.append(" FE0A; ; Map to nothing")
|
|---|
| 73 | t.append(" FE0B; ; Map to nothing")
|
|---|
| 74 | t.append(" FE0C; ; Map to nothing")
|
|---|
| 75 | t.append(" FE0D; ; Map to nothing")
|
|---|
| 76 | t.append(" FE0E; ; Map to nothing")
|
|---|
| 77 | t.append(" FE0F; ; Map to nothing")
|
|---|
| 78 |
|
|---|
| 79 | t.append(" FFFC; ; Map to nothing")
|
|---|
| 80 |
|
|---|
| 81 | # CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
|
|---|
| 82 | # TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
|
|---|
| 83 | # (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
|
|---|
| 84 |
|
|---|
| 85 | t.append(" 0009; 0020 ; Map to SPACE")
|
|---|
| 86 | t.append(" 000A; 0020 ; Map to SPACE")
|
|---|
| 87 | t.append(" 000B; 0020 ; Map to SPACE")
|
|---|
| 88 | t.append(" 000C; 0020 ; Map to SPACE")
|
|---|
| 89 | t.append(" 000D; 0020 ; Map to SPACE")
|
|---|
| 90 | t.append(" 0085; 0020 ; Map to SPACE")
|
|---|
| 91 |
|
|---|
| 92 | # All other control code (e.g., Cc) points or code points with a
|
|---|
| 93 | # control function (e.g., Cf) are mapped to nothing. The following is
|
|---|
| 94 | # a complete list of these code points: U+0000-0008, 000E-001F, 007F-
|
|---|
| 95 | # 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
|
|---|
| 96 | # 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
|
|---|
| 97 |
|
|---|
| 98 | t.append(" 0000-0008; ; Map to nothing")
|
|---|
| 99 | t.append(" 000E-001F; ; Map to nothing")
|
|---|
| 100 | t.append(" 007F-0084; ; Map to nothing")
|
|---|
| 101 | t.append(" 0086-009F; ; Map to nothing")
|
|---|
| 102 | t.append(" 06DD; ; Map to nothing")
|
|---|
| 103 | t.append(" 070F; ; Map to nothing")
|
|---|
| 104 | t.append(" 180E; ; Map to nothing")
|
|---|
| 105 | t.append(" 200C-200F; ; Map to nothing")
|
|---|
| 106 | t.append(" 202A-202E; ; Map to nothing")
|
|---|
| 107 | t.append(" 2060-2063; ; Map to nothing")
|
|---|
| 108 | t.append(" 206A-206F; ; Map to nothing")
|
|---|
| 109 | t.append(" FEFF; ; Map to nothing")
|
|---|
| 110 | t.append(" FFF9-FFFB; ; Map to nothing")
|
|---|
| 111 | t.append(" 1D173-1D17A; ; Map to nothing")
|
|---|
| 112 | t.append(" E0001; ; Map to nothing")
|
|---|
| 113 | t.append(" E0020-E007F; ; Map to nothing")
|
|---|
| 114 |
|
|---|
| 115 | # ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code
|
|---|
| 116 | # points with Separator (space, line, or paragraph) property (e.g., Zs,
|
|---|
| 117 | # Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
|
|---|
| 118 | # list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
|
|---|
| 119 | # 202F, 205F, 3000.
|
|---|
| 120 |
|
|---|
| 121 | t.append(" 200B; ; Map to nothing")
|
|---|
| 122 | t.append(" 0020; 0020; Map to SPACE")
|
|---|
| 123 | t.append(" 00A0; 0020; Map to SPACE")
|
|---|
| 124 | t.append(" 1680; 0020; Map to SPACE")
|
|---|
| 125 | t.append(" 2000-200A; 0020; Map to SPACE")
|
|---|
| 126 | t.append(" 2028-2029; 0020; Map to SPACE")
|
|---|
| 127 | t.append(" 202F; 0020; Map to SPACE")
|
|---|
| 128 | t.append(" 205F; 0020; Map to SPACE")
|
|---|
| 129 | t.append(" 3000; 0020; Map to SPACE")
|
|---|
| 130 |
|
|---|
| 131 | ret["rfc4518-map"] = t
|
|---|
| 132 |
|
|---|
| 133 | # For case ignore, numeric, and stored prefix string matching rules,
|
|---|
| 134 | # characters are case folded per B.2 of [RFC3454].
|
|---|
| 135 |
|
|---|
| 136 | t = []
|
|---|
| 137 |
|
|---|
| 138 | #2.4. Prohibit
|
|---|
| 139 |
|
|---|
| 140 | # The REPLACEMENT CHARACTER (U+FFFD) code point is prohibited.
|
|---|
| 141 |
|
|---|
| 142 | t.append(" FFFD;")
|
|---|
| 143 |
|
|---|
| 144 | ret["rfc4518-error"] = t
|
|---|
| 145 |
|
|---|
| 146 | t = []
|
|---|
| 147 |
|
|---|
| 148 |
|
|---|
| 149 |
|
|---|
| 150 | return ret
|
|---|