| 1 | /*
|
|---|
| 2 | * NOTE:
|
|---|
| 3 | *
|
|---|
| 4 | * This file imported from the Squid project. The licence below is
|
|---|
| 5 | * reproduced intact, but refers to files in Squid's repository, not
|
|---|
| 6 | * in Samba. See COPYING for the GPLv3 notice (being the later
|
|---|
| 7 | * version mentioned below).
|
|---|
| 8 | *
|
|---|
| 9 | * This file has also been modified, in particular to use talloc to
|
|---|
| 10 | * allocate in rfc1738_escape()
|
|---|
| 11 | *
|
|---|
| 12 | * - Andrew Bartlett Oct-2009
|
|---|
| 13 | *
|
|---|
| 14 | */
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 | /*
|
|---|
| 18 | * $Id$
|
|---|
| 19 | *
|
|---|
| 20 | * DEBUG:
|
|---|
| 21 | * AUTHOR: Harvest Derived
|
|---|
| 22 | *
|
|---|
| 23 | * SQUID Web Proxy Cache http://www.squid-cache.org/
|
|---|
| 24 | * ----------------------------------------------------------
|
|---|
| 25 | *
|
|---|
| 26 | * Squid is the result of efforts by numerous individuals from
|
|---|
| 27 | * the Internet community; see the CONTRIBUTORS file for full
|
|---|
| 28 | * details. Many organizations have provided support for Squid's
|
|---|
| 29 | * development; see the SPONSORS file for full details. Squid is
|
|---|
| 30 | * Copyrighted (C) 2001 by the Regents of the University of
|
|---|
| 31 | * California; see the COPYRIGHT file for full details. Squid
|
|---|
| 32 | * incorporates software developed and/or copyrighted by other
|
|---|
| 33 | * sources; see the CREDITS file for full details.
|
|---|
| 34 | *
|
|---|
| 35 | * This program is free software; you can redistribute it and/or modify
|
|---|
| 36 | * it under the terms of the GNU General Public License as published by
|
|---|
| 37 | * the Free Software Foundation; either version 2 of the License, or
|
|---|
| 38 | * (at your option) any later version.
|
|---|
| 39 | *
|
|---|
| 40 | * This program is distributed in the hope that it will be useful,
|
|---|
| 41 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 42 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 43 | * GNU General Public License for more details.
|
|---|
| 44 | *
|
|---|
| 45 | * You should have received a copy of the GNU General Public License
|
|---|
| 46 | * along with this program; if not, write to the Free Software
|
|---|
| 47 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
|
|---|
| 48 | *
|
|---|
| 49 | */
|
|---|
| 50 |
|
|---|
| 51 | #include "includes.h"
|
|---|
| 52 |
|
|---|
| 53 | #include "util.h"
|
|---|
| 54 |
|
|---|
| 55 | /*
|
|---|
| 56 | * RFC 1738 defines that these characters should be escaped, as well
|
|---|
| 57 | * any non-US-ASCII character or anything between 0x00 - 0x1F.
|
|---|
| 58 | */
|
|---|
| 59 | static char rfc1738_unsafe_chars[] = {
|
|---|
| 60 | (char) 0x3C, /* < */
|
|---|
| 61 | (char) 0x3E, /* > */
|
|---|
| 62 | (char) 0x22, /* " */
|
|---|
| 63 | (char) 0x23, /* # */
|
|---|
| 64 | #if 0 /* done in code */
|
|---|
| 65 | (char) 0x25, /* % */
|
|---|
| 66 | #endif
|
|---|
| 67 | (char) 0x7B, /* { */
|
|---|
| 68 | (char) 0x7D, /* } */
|
|---|
| 69 | (char) 0x7C, /* | */
|
|---|
| 70 | (char) 0x5C, /* \ */
|
|---|
| 71 | (char) 0x5E, /* ^ */
|
|---|
| 72 | (char) 0x7E, /* ~ */
|
|---|
| 73 | (char) 0x5B, /* [ */
|
|---|
| 74 | (char) 0x5D, /* ] */
|
|---|
| 75 | (char) 0x60, /* ` */
|
|---|
| 76 | (char) 0x27, /* ' */
|
|---|
| 77 | (char) 0x20 /* space */
|
|---|
| 78 | };
|
|---|
| 79 |
|
|---|
| 80 | static char rfc1738_reserved_chars[] = {
|
|---|
| 81 | (char) 0x3b, /* ; */
|
|---|
| 82 | (char) 0x2f, /* / */
|
|---|
| 83 | (char) 0x3f, /* ? */
|
|---|
| 84 | (char) 0x3a, /* : */
|
|---|
| 85 | (char) 0x40, /* @ */
|
|---|
| 86 | (char) 0x3d, /* = */
|
|---|
| 87 | (char) 0x26 /* & */
|
|---|
| 88 | };
|
|---|
| 89 |
|
|---|
| 90 | /*
|
|---|
| 91 | * rfc1738_escape - Returns a static buffer contains the RFC 1738
|
|---|
| 92 | * compliant, escaped version of the given url.
|
|---|
| 93 | *
|
|---|
| 94 | */
|
|---|
| 95 | static char *
|
|---|
| 96 | rfc1738_do_escape(TALLOC_CTX *mem_ctx, const char *url, int encode_reserved)
|
|---|
| 97 | {
|
|---|
| 98 | size_t bufsize = 0;
|
|---|
| 99 | const char *p;
|
|---|
| 100 | char *buf;
|
|---|
| 101 | char *q;
|
|---|
| 102 | unsigned int i, do_escape;
|
|---|
| 103 |
|
|---|
| 104 | bufsize = strlen(url) * 3 + 1;
|
|---|
| 105 | buf = talloc_array(mem_ctx, char, bufsize);
|
|---|
| 106 | if (!buf) {
|
|---|
| 107 | return NULL;
|
|---|
| 108 | }
|
|---|
| 109 |
|
|---|
| 110 | talloc_set_name_const(buf, buf);
|
|---|
| 111 | buf[0] = '\0';
|
|---|
| 112 |
|
|---|
| 113 | for (p = url, q = buf; *p != '\0' && q < (buf + bufsize - 1); p++, q++) {
|
|---|
| 114 | do_escape = 0;
|
|---|
| 115 |
|
|---|
| 116 | /* RFC 1738 defines these chars as unsafe */
|
|---|
| 117 | for (i = 0; i < sizeof(rfc1738_unsafe_chars); i++) {
|
|---|
| 118 | if (*p == rfc1738_unsafe_chars[i]) {
|
|---|
| 119 | do_escape = 1;
|
|---|
| 120 | break;
|
|---|
| 121 | }
|
|---|
| 122 | }
|
|---|
| 123 | /* Handle % separately */
|
|---|
| 124 | if (encode_reserved >= 0 && *p == '%')
|
|---|
| 125 | do_escape = 1;
|
|---|
| 126 | /* RFC 1738 defines these chars as reserved */
|
|---|
| 127 | for (i = 0; i < sizeof(rfc1738_reserved_chars) && encode_reserved > 0; i++) {
|
|---|
| 128 | if (*p == rfc1738_reserved_chars[i]) {
|
|---|
| 129 | do_escape = 1;
|
|---|
| 130 | break;
|
|---|
| 131 | }
|
|---|
| 132 | }
|
|---|
| 133 | /* RFC 1738 says any control chars (0x00-0x1F) are encoded */
|
|---|
| 134 | if ((unsigned char) *p <= (unsigned char) 0x1F) {
|
|---|
| 135 | do_escape = 1;
|
|---|
| 136 | }
|
|---|
| 137 | /* RFC 1738 says 0x7f is encoded */
|
|---|
| 138 | if (*p == (char) 0x7F) {
|
|---|
| 139 | do_escape = 1;
|
|---|
| 140 | }
|
|---|
| 141 | /* RFC 1738 says any non-US-ASCII are encoded */
|
|---|
| 142 | if (((unsigned char) *p >= (unsigned char) 0x80)) {
|
|---|
| 143 | do_escape = 1;
|
|---|
| 144 | }
|
|---|
| 145 | /* Do the triplet encoding, or just copy the char */
|
|---|
| 146 | /* note: while we do not need snprintf here as q is appropriately
|
|---|
| 147 | * allocated, Samba does to avoid our macro banning it -- abartlet */
|
|---|
| 148 |
|
|---|
| 149 | if (do_escape == 1) {
|
|---|
| 150 | (void) snprintf(q, 4, "%%%02X", (unsigned char) *p);
|
|---|
| 151 | q += sizeof(char) * 2;
|
|---|
| 152 | } else {
|
|---|
| 153 | *q = *p;
|
|---|
| 154 | }
|
|---|
| 155 | }
|
|---|
| 156 | *q = '\0';
|
|---|
| 157 | return (buf);
|
|---|
| 158 | }
|
|---|
| 159 |
|
|---|
| 160 | /*
|
|---|
| 161 | * rfc1738_escape - Returns a buffer that contains the RFC
|
|---|
| 162 | * 1738 compliant, escaped version of the given url. (escapes unsafe and % characters)
|
|---|
| 163 | */
|
|---|
| 164 | char *
|
|---|
| 165 | rfc1738_escape(TALLOC_CTX *mem_ctx, const char *url)
|
|---|
| 166 | {
|
|---|
| 167 | return rfc1738_do_escape(mem_ctx, url, 0);
|
|---|
| 168 | }
|
|---|
| 169 |
|
|---|
| 170 | /*
|
|---|
| 171 | * rfc1738_escape_unescaped - Returns a buffer that contains
|
|---|
| 172 | * the RFC 1738 compliant, escaped version of the given url (escapes unsafe chars only)
|
|---|
| 173 | */
|
|---|
| 174 | char *
|
|---|
| 175 | rfc1738_escape_unescaped(TALLOC_CTX *mem_ctx, const char *url)
|
|---|
| 176 | {
|
|---|
| 177 | return rfc1738_do_escape(mem_ctx, url, -1);
|
|---|
| 178 | }
|
|---|
| 179 |
|
|---|
| 180 | /*
|
|---|
| 181 | * rfc1738_escape_part - Returns a buffer that contains the RFC
|
|---|
| 182 | * 1738 compliant, escaped version of the given url segment. (escapes
|
|---|
| 183 | * unsafe, reserved and % chars) It would mangle the :// in http://,
|
|---|
| 184 | * and mangle paths (because of /).
|
|---|
| 185 | */
|
|---|
| 186 | char *
|
|---|
| 187 | rfc1738_escape_part(TALLOC_CTX *mem_ctx, const char *url)
|
|---|
| 188 | {
|
|---|
| 189 | return rfc1738_do_escape(mem_ctx, url, 1);
|
|---|
| 190 | }
|
|---|
| 191 |
|
|---|
| 192 | /*
|
|---|
| 193 | * rfc1738_unescape() - Converts escaped characters (%xy numbers) in
|
|---|
| 194 | * given the string. %% is a %. %ab is the 8-bit hexadecimal number "ab"
|
|---|
| 195 | */
|
|---|
| 196 | _PUBLIC_ void
|
|---|
| 197 | rfc1738_unescape(char *s)
|
|---|
| 198 | {
|
|---|
| 199 | char hexnum[3];
|
|---|
| 200 | int i, j; /* i is write, j is read */
|
|---|
| 201 | unsigned int x;
|
|---|
| 202 | for (i = j = 0; s[j]; i++, j++) {
|
|---|
| 203 | s[i] = s[j];
|
|---|
| 204 | if (s[i] != '%')
|
|---|
| 205 | continue;
|
|---|
| 206 | if (s[j + 1] == '%') { /* %% case */
|
|---|
| 207 | j++;
|
|---|
| 208 | continue;
|
|---|
| 209 | }
|
|---|
| 210 | if (s[j + 1] && s[j + 2]) {
|
|---|
| 211 | if (s[j + 1] == '0' && s[j + 2] == '0') { /* %00 case */
|
|---|
| 212 | j += 2;
|
|---|
| 213 | continue;
|
|---|
| 214 | }
|
|---|
| 215 | hexnum[0] = s[j + 1];
|
|---|
| 216 | hexnum[1] = s[j + 2];
|
|---|
| 217 | hexnum[2] = '\0';
|
|---|
| 218 | if (1 == sscanf(hexnum, "%x", &x)) {
|
|---|
| 219 | s[i] = (char) (0x0ff & x);
|
|---|
| 220 | j += 2;
|
|---|
| 221 | }
|
|---|
| 222 | }
|
|---|
| 223 | }
|
|---|
| 224 | s[i] = '\0';
|
|---|
| 225 | }
|
|---|