| 1 | /* | 
|---|
| 2 | * NOTE: | 
|---|
| 3 | * | 
|---|
| 4 | * This file imported from the Squid project.  The licence below is | 
|---|
| 5 | * reproduced intact, but refers to files in Squid's repository, not | 
|---|
| 6 | * in Samba.  See COPYING for the GPLv3 notice (being the later | 
|---|
| 7 | * version mentioned below). | 
|---|
| 8 | * | 
|---|
| 9 | * This file has also been modified, in particular to use talloc to | 
|---|
| 10 | * allocate in rfc1738_escape() | 
|---|
| 11 | * | 
|---|
| 12 | * - Andrew Bartlett Oct-2009 | 
|---|
| 13 | * | 
|---|
| 14 | */ | 
|---|
| 15 |  | 
|---|
| 16 |  | 
|---|
| 17 | /* | 
|---|
| 18 | * $Id$ | 
|---|
| 19 | * | 
|---|
| 20 | * DEBUG: | 
|---|
| 21 | * AUTHOR: Harvest Derived | 
|---|
| 22 | * | 
|---|
| 23 | * SQUID Web Proxy Cache          http://www.squid-cache.org/ | 
|---|
| 24 | * ---------------------------------------------------------- | 
|---|
| 25 | * | 
|---|
| 26 | *  Squid is the result of efforts by numerous individuals from | 
|---|
| 27 | *  the Internet community; see the CONTRIBUTORS file for full | 
|---|
| 28 | *  details.   Many organizations have provided support for Squid's | 
|---|
| 29 | *  development; see the SPONSORS file for full details.  Squid is | 
|---|
| 30 | *  Copyrighted (C) 2001 by the Regents of the University of | 
|---|
| 31 | *  California; see the COPYRIGHT file for full details.  Squid | 
|---|
| 32 | *  incorporates software developed and/or copyrighted by other | 
|---|
| 33 | *  sources; see the CREDITS file for full details. | 
|---|
| 34 | * | 
|---|
| 35 | *  This program is free software; you can redistribute it and/or modify | 
|---|
| 36 | *  it under the terms of the GNU General Public License as published by | 
|---|
| 37 | *  the Free Software Foundation; either version 2 of the License, or | 
|---|
| 38 | *  (at your option) any later version. | 
|---|
| 39 | * | 
|---|
| 40 | *  This program is distributed in the hope that it will be useful, | 
|---|
| 41 | *  but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 42 | *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 43 | *  GNU General Public License for more details. | 
|---|
| 44 | * | 
|---|
| 45 | *  You should have received a copy of the GNU General Public License | 
|---|
| 46 | *  along with this program; if not, write to the Free Software | 
|---|
| 47 | *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | 
|---|
| 48 | * | 
|---|
| 49 | */ | 
|---|
| 50 |  | 
|---|
| 51 | #include "includes.h" | 
|---|
| 52 |  | 
|---|
| 53 | #include "util.h" | 
|---|
| 54 |  | 
|---|
| 55 | /* | 
|---|
| 56 | *  RFC 1738 defines that these characters should be escaped, as well | 
|---|
| 57 | *  any non-US-ASCII character or anything between 0x00 - 0x1F. | 
|---|
| 58 | */ | 
|---|
| 59 | static char rfc1738_unsafe_chars[] = { | 
|---|
| 60 | (char) 0x3C,                /* < */ | 
|---|
| 61 | (char) 0x3E,                /* > */ | 
|---|
| 62 | (char) 0x22,                /* " */ | 
|---|
| 63 | (char) 0x23,                /* # */ | 
|---|
| 64 | #if 0                           /* done in code */ | 
|---|
| 65 | (char) 0x25,                /* % */ | 
|---|
| 66 | #endif | 
|---|
| 67 | (char) 0x7B,                /* { */ | 
|---|
| 68 | (char) 0x7D,                /* } */ | 
|---|
| 69 | (char) 0x7C,                /* | */ | 
|---|
| 70 | (char) 0x5C,                /* \ */ | 
|---|
| 71 | (char) 0x5E,                /* ^ */ | 
|---|
| 72 | (char) 0x7E,                /* ~ */ | 
|---|
| 73 | (char) 0x5B,                /* [ */ | 
|---|
| 74 | (char) 0x5D,                /* ] */ | 
|---|
| 75 | (char) 0x60,                /* ` */ | 
|---|
| 76 | (char) 0x27,                /* ' */ | 
|---|
| 77 | (char) 0x20                 /* space */ | 
|---|
| 78 | }; | 
|---|
| 79 |  | 
|---|
| 80 | static char rfc1738_reserved_chars[] = { | 
|---|
| 81 | (char) 0x3b,                /* ; */ | 
|---|
| 82 | (char) 0x2f,                /* / */ | 
|---|
| 83 | (char) 0x3f,                /* ? */ | 
|---|
| 84 | (char) 0x3a,                /* : */ | 
|---|
| 85 | (char) 0x40,                /* @ */ | 
|---|
| 86 | (char) 0x3d,                /* = */ | 
|---|
| 87 | (char) 0x26                 /* & */ | 
|---|
| 88 | }; | 
|---|
| 89 |  | 
|---|
| 90 | /* | 
|---|
| 91 | *  rfc1738_escape - Returns a static buffer contains the RFC 1738 | 
|---|
| 92 | *  compliant, escaped version of the given url. | 
|---|
| 93 | * | 
|---|
| 94 | */ | 
|---|
| 95 | static char * | 
|---|
| 96 | rfc1738_do_escape(TALLOC_CTX *mem_ctx, const char *url, int encode_reserved) | 
|---|
| 97 | { | 
|---|
| 98 | size_t bufsize = 0; | 
|---|
| 99 | const char *p; | 
|---|
| 100 | char *buf; | 
|---|
| 101 | char *q; | 
|---|
| 102 | unsigned int i, do_escape; | 
|---|
| 103 |  | 
|---|
| 104 | bufsize = strlen(url) * 3 + 1; | 
|---|
| 105 | buf = talloc_array(mem_ctx, char, bufsize); | 
|---|
| 106 | if (!buf) { | 
|---|
| 107 | return NULL; | 
|---|
| 108 | } | 
|---|
| 109 |  | 
|---|
| 110 | talloc_set_name_const(buf, buf); | 
|---|
| 111 | buf[0] = '\0'; | 
|---|
| 112 |  | 
|---|
| 113 | for (p = url, q = buf; *p != '\0' && q < (buf + bufsize - 1); p++, q++) { | 
|---|
| 114 | do_escape = 0; | 
|---|
| 115 |  | 
|---|
| 116 | /* RFC 1738 defines these chars as unsafe */ | 
|---|
| 117 | for (i = 0; i < sizeof(rfc1738_unsafe_chars); i++) { | 
|---|
| 118 | if (*p == rfc1738_unsafe_chars[i]) { | 
|---|
| 119 | do_escape = 1; | 
|---|
| 120 | break; | 
|---|
| 121 | } | 
|---|
| 122 | } | 
|---|
| 123 | /* Handle % separately */ | 
|---|
| 124 | if (encode_reserved >= 0 && *p == '%') | 
|---|
| 125 | do_escape = 1; | 
|---|
| 126 | /* RFC 1738 defines these chars as reserved */ | 
|---|
| 127 | for (i = 0; i < sizeof(rfc1738_reserved_chars) && encode_reserved > 0; i++) { | 
|---|
| 128 | if (*p == rfc1738_reserved_chars[i]) { | 
|---|
| 129 | do_escape = 1; | 
|---|
| 130 | break; | 
|---|
| 131 | } | 
|---|
| 132 | } | 
|---|
| 133 | /* RFC 1738 says any control chars (0x00-0x1F) are encoded */ | 
|---|
| 134 | if ((unsigned char) *p <= (unsigned char) 0x1F) { | 
|---|
| 135 | do_escape = 1; | 
|---|
| 136 | } | 
|---|
| 137 | /* RFC 1738 says 0x7f is encoded */ | 
|---|
| 138 | if (*p == (char) 0x7F) { | 
|---|
| 139 | do_escape = 1; | 
|---|
| 140 | } | 
|---|
| 141 | /* RFC 1738 says any non-US-ASCII are encoded */ | 
|---|
| 142 | if (((unsigned char) *p >= (unsigned char) 0x80)) { | 
|---|
| 143 | do_escape = 1; | 
|---|
| 144 | } | 
|---|
| 145 | /* Do the triplet encoding, or just copy the char */ | 
|---|
| 146 | /* note: while we do not need snprintf here as q is appropriately | 
|---|
| 147 | * allocated, Samba does to avoid our macro banning it -- abartlet */ | 
|---|
| 148 |  | 
|---|
| 149 | if (do_escape == 1) { | 
|---|
| 150 | (void) snprintf(q, 4, "%%%02X", (unsigned char) *p); | 
|---|
| 151 | q += sizeof(char) * 2; | 
|---|
| 152 | } else { | 
|---|
| 153 | *q = *p; | 
|---|
| 154 | } | 
|---|
| 155 | } | 
|---|
| 156 | *q = '\0'; | 
|---|
| 157 | return (buf); | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | /* | 
|---|
| 161 | * rfc1738_escape - Returns a buffer that contains the RFC | 
|---|
| 162 | * 1738 compliant, escaped version of the given url. (escapes unsafe and % characters) | 
|---|
| 163 | */ | 
|---|
| 164 | char * | 
|---|
| 165 | rfc1738_escape(TALLOC_CTX *mem_ctx, const char *url) | 
|---|
| 166 | { | 
|---|
| 167 | return rfc1738_do_escape(mem_ctx, url, 0); | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | /* | 
|---|
| 171 | * rfc1738_escape_unescaped - Returns a buffer that contains | 
|---|
| 172 | * the RFC 1738 compliant, escaped version of the given url (escapes unsafe chars only) | 
|---|
| 173 | */ | 
|---|
| 174 | char * | 
|---|
| 175 | rfc1738_escape_unescaped(TALLOC_CTX *mem_ctx, const char *url) | 
|---|
| 176 | { | 
|---|
| 177 | return rfc1738_do_escape(mem_ctx, url, -1); | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | /* | 
|---|
| 181 | * rfc1738_escape_part - Returns a buffer that contains the RFC | 
|---|
| 182 | * 1738 compliant, escaped version of the given url segment. (escapes | 
|---|
| 183 | * unsafe, reserved and % chars) It would mangle the :// in http://, | 
|---|
| 184 | * and mangle paths (because of /). | 
|---|
| 185 | */ | 
|---|
| 186 | char * | 
|---|
| 187 | rfc1738_escape_part(TALLOC_CTX *mem_ctx, const char *url) | 
|---|
| 188 | { | 
|---|
| 189 | return rfc1738_do_escape(mem_ctx, url, 1); | 
|---|
| 190 | } | 
|---|
| 191 |  | 
|---|
| 192 | /* | 
|---|
| 193 | *  rfc1738_unescape() - Converts escaped characters (%xy numbers) in | 
|---|
| 194 | *  given the string.  %% is a %. %ab is the 8-bit hexadecimal number "ab" | 
|---|
| 195 | */ | 
|---|
| 196 | _PUBLIC_ void | 
|---|
| 197 | rfc1738_unescape(char *s) | 
|---|
| 198 | { | 
|---|
| 199 | char hexnum[3]; | 
|---|
| 200 | int i, j;                   /* i is write, j is read */ | 
|---|
| 201 | unsigned int x; | 
|---|
| 202 | for (i = j = 0; s[j]; i++, j++) { | 
|---|
| 203 | s[i] = s[j]; | 
|---|
| 204 | if (s[i] != '%') | 
|---|
| 205 | continue; | 
|---|
| 206 | if (s[j + 1] == '%') {  /* %% case */ | 
|---|
| 207 | j++; | 
|---|
| 208 | continue; | 
|---|
| 209 | } | 
|---|
| 210 | if (s[j + 1] && s[j + 2]) { | 
|---|
| 211 | if (s[j + 1] == '0' && s[j + 2] == '0') {   /* %00 case */ | 
|---|
| 212 | j += 2; | 
|---|
| 213 | continue; | 
|---|
| 214 | } | 
|---|
| 215 | hexnum[0] = s[j + 1]; | 
|---|
| 216 | hexnum[1] = s[j + 2]; | 
|---|
| 217 | hexnum[2] = '\0'; | 
|---|
| 218 | if (1 == sscanf(hexnum, "%x", &x)) { | 
|---|
| 219 | s[i] = (char) (0x0ff & x); | 
|---|
| 220 | j += 2; | 
|---|
| 221 | } | 
|---|
| 222 | } | 
|---|
| 223 | } | 
|---|
| 224 | s[i] = '\0'; | 
|---|
| 225 | } | 
|---|