Changeset 97 for trunk/src/helpers/encodings.c
- Timestamp:
- Aug 12, 2001, 5:34:51 PM (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/helpers/encodings.c
r63 r97 127 127 } 128 128 129 129 /* 130 *@@ encDecodeUTF8: 131 * decodes one UTF-8 character and returns 132 * the Unicode value or -1 if the character 133 * is invalid. 134 * 135 * On input, *ppch is assumed to point to 136 * the first byte of the UTF-8 char to be 137 * read. 138 * 139 * This function will advance *ppch by at 140 * least one byte (or more if the UTF-8 141 * char initially pointed to introduces 142 * a multi-byte sequence). 143 * 144 * This returns -1 if *ppch points to an 145 * invalid encoding (in which case the 146 * pointer is advanced anyway). 147 * 148 * This returns 0 if **ppch points to a 149 * null character. 150 * 151 *@@added V0.9.14 (2001-08-09) [umoeller] 152 */ 153 154 unsigned long encDecodeUTF8(const char **ppch) 155 { 156 unsigned long ulChar = **ppch; 157 158 if (!ulChar) 159 return 0; 160 161 // if (ulChar < 0x80): simple, one byte only... use that 162 163 if (ulChar >= 0x80) 164 { 165 unsigned long ulCount = 1; 166 int fIllegal = 0; 167 168 // note: 0xc0 and 0xc1 are reserved and 169 // cannot appear as the first UTF-8 byte 170 171 if ( (ulChar >= 0xc2) 172 && (ulChar < 0xe0) 173 ) 174 { 175 // that's two bytes 176 ulCount = 2; 177 ulChar &= 0x1f; 178 } 179 else if ((ulChar & 0xf0) == 0xe0) 180 { 181 // three bytes 182 ulCount = 3; 183 ulChar &= 0x0f; 184 } 185 else if ((ulChar & 0xf8) == 0xf0) 186 { 187 // four bytes 188 ulCount = 4; 189 ulChar &= 0x07; 190 } 191 else if ((ulChar & 0xfc) == 0xf8) 192 { 193 // five bytes 194 ulCount = 5; 195 ulChar &= 0x03; 196 } 197 else if ((ulChar & 0xfe) == 0xfc) 198 { 199 // six bytes 200 ulCount = 6; 201 ulChar &= 0x01; 202 } 203 else 204 ++fIllegal; 205 206 if (!fIllegal) 207 { 208 // go for the second and more bytes then 209 int ul2; 210 211 for (ul2 = 1; 212 ul2 < ulCount; 213 ++ul2) 214 { 215 unsigned long ulChar2 = *((*ppch) + ul2); 216 217 if (!(ulChar2 & 0xc0)) // != 0x80) 218 { 219 ++fIllegal; 220 break; 221 } 222 223 ulChar <<= 6; 224 ulChar |= ulChar2 & 0x3f; 225 } 226 } 227 228 if (fIllegal) 229 { 230 // skip all the following characters 231 // until we find something with bit 7 off 232 do 233 { 234 ulChar = *(++(*ppch)); 235 if (!ulChar) 236 break; 237 } while (ulChar & 0x80); 238 } 239 else 240 *ppch += ulCount; 241 } 242 else 243 (*ppch)++; 244 245 return (ulChar); 246 } 247
Note:
See TracChangeset
for help on using the changeset viewer.