Changeset 388 for python/vendor/current/Modules/cjkcodecs
- Timestamp:
- Mar 19, 2014, 11:11:30 AM (11 years ago)
- Location:
- python/vendor/current/Modules/cjkcodecs
- Files:
-
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
python/vendor/current/Modules/cjkcodecs/_codecs_cn.c
r2 r388 18 18 /* GBK and GB2312 map differently in few codepoints that are listed below: 19 19 * 20 * gb2312gbk21 * A1A4 U+30FB KATAKANA MIDDLE DOTU+00B7 MIDDLE DOT22 * A1AA U+2015 HORIZONTAL BARU+2014 EM DASH23 * A844 undefinedU+2015 HORIZONTAL BAR20 * gb2312 gbk 21 * A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT 22 * A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH 23 * A844 undefined U+2015 HORIZONTAL BAR 24 24 */ 25 25 26 26 #define GBK_DECODE(dc1, dc2, assi) \ 27 28 29 30 31 27 if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \ 28 else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \ 29 else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \ 30 else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \ 31 else TRYMAP_DEC(gbkext, assi, dc1, dc2); 32 32 33 33 #define GBK_ENCODE(code, assi) \ 34 35 36 37 34 if ((code) == 0x2014) (assi) = 0xa1aa; \ 35 else if ((code) == 0x2015) (assi) = 0xa844; \ 36 else if ((code) == 0x00b7) (assi) = 0xa1a4; \ 37 else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code)); 38 38 39 39 /* … … 43 43 ENCODER(gb2312) 44 44 { 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 45 while (inleft > 0) { 46 Py_UNICODE c = IN1; 47 DBCHAR code; 48 49 if (c < 0x80) { 50 WRITE1((unsigned char)c) 51 NEXT(1, 1) 52 continue; 53 } 54 UCS4INVALID(c) 55 56 REQUIRE_OUTBUF(2) 57 TRYMAP_ENC(gbcommon, code, c); 58 else return 1; 59 60 if (code & 0x8000) /* MSB set: GBK */ 61 return 1; 62 63 OUT1((code >> 8) | 0x80) 64 OUT2((code & 0xFF) | 0x80) 65 NEXT(1, 2) 66 } 67 68 return 0; 69 69 } 70 70 71 71 DECODER(gb2312) 72 72 { 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 73 while (inleft > 0) { 74 unsigned char c = **inbuf; 75 76 REQUIRE_OUTBUF(1) 77 78 if (c < 0x80) { 79 OUT1(c) 80 NEXT(1, 1) 81 continue; 82 } 83 84 REQUIRE_INBUF(2) 85 TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { 86 NEXT(2, 1) 87 } 88 else return 2; 89 } 90 91 return 0; 92 92 } 93 93 … … 99 99 ENCODER(gbk) 100 100 { 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 101 while (inleft > 0) { 102 Py_UNICODE c = IN1; 103 DBCHAR code; 104 105 if (c < 0x80) { 106 WRITE1((unsigned char)c) 107 NEXT(1, 1) 108 continue; 109 } 110 UCS4INVALID(c) 111 112 REQUIRE_OUTBUF(2) 113 114 GBK_ENCODE(c, code) 115 else return 1; 116 117 OUT1((code >> 8) | 0x80) 118 if (code & 0x8000) 119 OUT2((code & 0xFF)) /* MSB set: GBK */ 120 else 121 OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ 122 NEXT(1, 2) 123 } 124 125 return 0; 126 126 } 127 127 128 128 DECODER(gbk) 129 129 { 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 130 while (inleft > 0) { 131 unsigned char c = IN1; 132 133 REQUIRE_OUTBUF(1) 134 135 if (c < 0x80) { 136 OUT1(c) 137 NEXT(1, 1) 138 continue; 139 } 140 141 REQUIRE_INBUF(2) 142 143 GBK_DECODE(c, IN2, **outbuf) 144 else return 2; 145 146 NEXT(2, 1) 147 } 148 149 return 0; 150 150 } 151 151 … … 157 157 ENCODER(gb18030) 158 158 { 159 160 161 162 163 164 165 166 167 168 169 170 159 while (inleft > 0) { 160 ucs4_t c = IN1; 161 DBCHAR code; 162 163 if (c < 0x80) { 164 WRITE1(c) 165 NEXT(1, 1) 166 continue; 167 } 168 169 DECODE_SURROGATE(c) 170 if (c > 0x10FFFF) 171 171 #if Py_UNICODE_SIZE == 2 172 172 return 2; /* surrogates pair */ 173 173 #else 174 174 return 1; 175 175 #endif 176 177 178 179 180 181 182 183 184 185 186 187 176 else if (c >= 0x10000) { 177 ucs4_t tc = c - 0x10000; 178 179 REQUIRE_OUTBUF(4) 180 181 OUT4((unsigned char)(tc % 10) + 0x30) 182 tc /= 10; 183 OUT3((unsigned char)(tc % 126) + 0x81) 184 tc /= 126; 185 OUT2((unsigned char)(tc % 10) + 0x30) 186 tc /= 10; 187 OUT1((unsigned char)(tc + 0x90)) 188 188 189 189 #if Py_UNICODE_SIZE == 2 190 190 NEXT(2, 4) /* surrogates pair */ 191 191 #else 192 192 NEXT(1, 4) 193 193 #endif 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 194 continue; 195 } 196 197 REQUIRE_OUTBUF(2) 198 199 GBK_ENCODE(c, code) 200 else TRYMAP_ENC(gb18030ext, code, c); 201 else { 202 const struct _gb18030_to_unibmp_ranges *utrrange; 203 204 REQUIRE_OUTBUF(4) 205 206 for (utrrange = gb18030_to_unibmp_ranges; 207 utrrange->first != 0; 208 utrrange++) 209 if (utrrange->first <= c && 210 c <= utrrange->last) { 211 Py_UNICODE tc; 212 213 tc = c - utrrange->first + 214 utrrange->base; 215 216 OUT4((unsigned char)(tc % 10) + 0x30) 217 tc /= 10; 218 OUT3((unsigned char)(tc % 126) + 0x81) 219 tc /= 126; 220 OUT2((unsigned char)(tc % 10) + 0x30) 221 tc /= 10; 222 OUT1((unsigned char)tc + 0x81) 223 224 NEXT(1, 4) 225 break; 226 } 227 228 if (utrrange->first == 0) 229 return 1; 230 continue; 231 } 232 233 OUT1((code >> 8) | 0x80) 234 if (code & 0x8000) 235 OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */ 236 else 237 OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ 238 239 NEXT(1, 2) 240 } 241 242 return 0; 243 243 } 244 244 245 245 DECODER(gb18030) 246 246 { 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 247 while (inleft > 0) { 248 unsigned char c = IN1, c2; 249 250 REQUIRE_OUTBUF(1) 251 252 if (c < 0x80) { 253 OUT1(c) 254 NEXT(1, 1) 255 continue; 256 } 257 258 REQUIRE_INBUF(2) 259 260 c2 = IN2; 261 if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ 262 const struct _gb18030_to_unibmp_ranges *utr; 263 unsigned char c3, c4; 264 ucs4_t lseq; 265 266 REQUIRE_INBUF(4) 267 c3 = IN3; 268 c4 = IN4; 269 if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) 270 return 4; 271 c -= 0x81; c2 -= 0x30; 272 c3 -= 0x81; c4 -= 0x30; 273 274 if (c < 4) { /* U+0080 - U+FFFF */ 275 lseq = ((ucs4_t)c * 10 + c2) * 1260 + 276 (ucs4_t)c3 * 10 + c4; 277 if (lseq < 39420) { 278 for (utr = gb18030_to_unibmp_ranges; 279 lseq >= (utr + 1)->base; 280 utr++) ; 281 OUT1(utr->first - utr->base + lseq) 282 NEXT(4, 1) 283 continue; 284 } 285 } 286 else if (c >= 15) { /* U+10000 - U+10FFFF */ 287 lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2) 288 * 1260 + (ucs4_t)c3 * 10 + c4; 289 if (lseq <= 0x10FFFF) { 290 WRITEUCS4(lseq); 291 NEXT_IN(4) 292 continue; 293 } 294 } 295 return 4; 296 } 297 298 GBK_DECODE(c, c2, **outbuf) 299 else TRYMAP_DEC(gb18030ext, **outbuf, c, c2); 300 else return 2; 301 302 NEXT(2, 1) 303 } 304 305 return 0; 306 306 } 307 307 … … 313 313 ENCODER_INIT(hz) 314 314 { 315 316 315 state->i = 0; 316 return 0; 317 317 } 318 318 319 319 ENCODER_RESET(hz) 320 320 { 321 322 323 324 325 326 321 if (state->i != 0) { 322 WRITE2('~', '}') 323 state->i = 0; 324 NEXT_OUT(2) 325 } 326 return 0; 327 327 } 328 328 329 329 ENCODER(hz) 330 330 { 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 331 while (inleft > 0) { 332 Py_UNICODE c = IN1; 333 DBCHAR code; 334 335 if (c < 0x80) { 336 if (state->i == 0) { 337 WRITE1((unsigned char)c) 338 NEXT(1, 1) 339 } 340 else { 341 WRITE3('~', '}', (unsigned char)c) 342 NEXT(1, 3) 343 state->i = 0; 344 } 345 continue; 346 } 347 348 UCS4INVALID(c) 349 350 TRYMAP_ENC(gbcommon, code, c); 351 else return 1; 352 353 if (code & 0x8000) /* MSB set: GBK */ 354 return 1; 355 356 if (state->i == 0) { 357 WRITE4('~', '{', code >> 8, code & 0xff) 358 NEXT(1, 4) 359 state->i = 1; 360 } 361 else { 362 WRITE2(code >> 8, code & 0xff) 363 NEXT(1, 2) 364 } 365 } 366 367 return 0; 368 368 } 369 369 370 370 DECODER_INIT(hz) 371 371 { 372 373 372 state->i = 0; 373 return 0; 374 374 } 375 375 376 376 DECODER_RESET(hz) 377 377 { 378 379 378 state->i = 0; 379 return 0; 380 380 } 381 381 382 382 DECODER(hz) 383 383 { 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 384 while (inleft > 0) { 385 unsigned char c = IN1; 386 387 if (c == '~') { 388 unsigned char c2 = IN2; 389 390 REQUIRE_INBUF(2) 391 if (c2 == '~') { 392 WRITE1('~') 393 NEXT(2, 1) 394 continue; 395 } 396 else if (c2 == '{' && state->i == 0) 397 state->i = 1; /* set GB */ 398 else if (c2 == '}' && state->i == 1) 399 state->i = 0; /* set ASCII */ 400 else if (c2 == '\n') 401 ; /* line-continuation */ 402 else 403 return 2; 404 NEXT(2, 0); 405 continue; 406 } 407 408 if (c & 0x80) 409 return 1; 410 411 if (state->i == 0) { /* ASCII mode */ 412 WRITE1(c) 413 NEXT(1, 1) 414 } 415 else { /* GB mode */ 416 REQUIRE_INBUF(2) 417 REQUIRE_OUTBUF(1) 418 TRYMAP_DEC(gb2312, **outbuf, c, IN2) { 419 NEXT(2, 1) 420 } 421 else 422 return 2; 423 } 424 } 425 426 return 0; 427 427 } 428 428 -
python/vendor/current/Modules/cjkcodecs/_codecs_hk.c
r2 r388 19 19 CODEC_INIT(big5hkscs) 20 20 { 21 21 static int initialized = 0; 22 22 23 24 25 26 23 if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap)) 24 return -1; 25 initialized = 1; 26 return 0; 27 27 } 28 28 … … 39 39 ENCODER(big5hkscs) 40 40 { 41 42 43 44 41 while (inleft > 0) { 42 ucs4_t c = **inbuf; 43 DBCHAR code; 44 Py_ssize_t insize; 45 45 46 47 48 49 50 51 46 if (c < 0x80) { 47 REQUIRE_OUTBUF(1) 48 **outbuf = (unsigned char)c; 49 NEXT(1, 1) 50 continue; 51 } 52 52 53 54 53 DECODE_SURROGATE(c) 54 insize = GET_INSIZE(c); 55 55 56 56 REQUIRE_OUTBUF(2) 57 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 58 if (c < 0x10000) { 59 TRYMAP_ENC(big5hkscs_bmp, code, c) { 60 if (code == MULTIC) { 61 if (inleft >= 2 && 62 ((c & 0xffdf) == 0x00ca) && 63 (((*inbuf)[1] & 0xfff7) == 0x0304)) { 64 code = big5hkscs_pairenc_table[ 65 ((c >> 4) | 66 ((*inbuf)[1] >> 3)) & 3]; 67 insize = 2; 68 } 69 else if (inleft < 2 && 70 !(flags & MBENC_FLUSH)) 71 return MBERR_TOOFEW; 72 else { 73 if (c == 0xca) 74 code = 0x8866; 75 else /* c == 0xea */ 76 code = 0x88a7; 77 } 78 } 79 } 80 else TRYMAP_ENC(big5, code, c); 81 else return 1; 82 } 83 else if (c < 0x20000) 84 return insize; 85 else if (c < 0x30000) { 86 TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff); 87 else return insize; 88 } 89 else 90 return insize; 91 91 92 93 94 95 92 OUT1(code >> 8) 93 OUT2(code & 0xFF) 94 NEXT(insize, 2) 95 } 96 96 97 97 return 0; 98 98 } 99 99 … … 102 102 DECODER(big5hkscs) 103 103 { 104 105 106 104 while (inleft > 0) { 105 unsigned char c = IN1; 106 ucs4_t decoded; 107 107 108 108 REQUIRE_OUTBUF(1) 109 109 110 111 112 113 114 110 if (c < 0x80) { 111 OUT1(c) 112 NEXT(1, 1) 113 continue; 114 } 115 115 116 116 REQUIRE_INBUF(2) 117 117 118 if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1)) 119 goto hkscsdec; 118 if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) { 119 TRYMAP_DEC(big5, **outbuf, c, IN2) { 120 NEXT(2, 1) 121 continue; 122 } 123 } 120 124 121 TRYMAP_DEC(big5, **outbuf, c, IN2) { 122 NEXT(2, 1) 123 } 124 else 125 hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) { 126 int s = BH2S(c, IN2); 127 const unsigned char *hintbase; 125 TRYMAP_DEC(big5hkscs, decoded, c, IN2) 126 { 127 int s = BH2S(c, IN2); 128 const unsigned char *hintbase; 128 129 129 130 130 assert(0x87 <= c && c <= 0xfe); 131 assert(0x40 <= IN2 && IN2 <= 0xfe); 131 132 132 133 134 135 136 137 138 139 140 141 142 143 144 145 133 if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { 134 hintbase = big5hkscs_phint_0; 135 s -= BH2S(0x87, 0x40); 136 } 137 else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ 138 hintbase = big5hkscs_phint_12130; 139 s -= BH2S(0xc6, 0xa1); 140 } 141 else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ 142 hintbase = big5hkscs_phint_21924; 143 s -= BH2S(0xf9, 0xd6); 144 } 145 else 146 return MBERR_INTERNAL; 146 147 147 if (hintbase[s >> 3] & (1 << (s & 7))) { 148 WRITEUCS4(decoded | 0x20000) 149 NEXT_IN(2) 150 } 151 else { 152 OUT1(decoded) 153 NEXT(2, 1) 154 } 155 } 156 else { 157 switch ((c << 8) | IN2) { 158 case 0x8862: WRITE2(0x00ca, 0x0304); break; 159 case 0x8864: WRITE2(0x00ca, 0x030c); break; 160 case 0x88a3: WRITE2(0x00ea, 0x0304); break; 161 case 0x88a5: WRITE2(0x00ea, 0x030c); break; 162 default: return 2; 163 } 148 if (hintbase[s >> 3] & (1 << (s & 7))) { 149 WRITEUCS4(decoded | 0x20000) 150 NEXT_IN(2) 151 } 152 else { 153 OUT1(decoded) 154 NEXT(2, 1) 155 } 156 continue; 157 } 164 158 165 NEXT(2, 2) /* all decoded codepoints are pairs, above. */ 166 } 167 } 159 switch ((c << 8) | IN2) { 160 case 0x8862: WRITE2(0x00ca, 0x0304); break; 161 case 0x8864: WRITE2(0x00ca, 0x030c); break; 162 case 0x88a3: WRITE2(0x00ea, 0x0304); break; 163 case 0x88a5: WRITE2(0x00ea, 0x030c); break; 164 default: return 2; 165 } 168 166 169 return 0; 167 NEXT(2, 2) /* all decoded codepoints are pairs, above. */ 168 } 169 170 return 0; 170 171 } 171 172 -
python/vendor/current/Modules/cjkcodecs/_codecs_iso2022.c
r2 r388 20 20 state->c[0-3] 21 21 22 23 24 25 22 00000000 23 ||^^^^^| 24 |+-----+---- G0-3 Character Set 25 +----------- Is G0-3 double byte? 26 26 27 27 state->c[4] 28 28 29 30 31 32 29 00000000 30 || 31 |+---- Locked-Shift? 32 +----- ESC Throughout 33 33 */ 34 34 35 #define ESC 36 #define SO 37 #define SI 38 #define LF 39 40 #define MAX_ESCSEQLEN 41 42 #define CHARSET_ISO8859_1 43 #define CHARSET_ASCII 44 #define CHARSET_ISO8859_7 45 #define CHARSET_JISX0201_K 46 #define CHARSET_JISX0201_R 47 48 #define CHARSET_GB2312 49 #define CHARSET_JISX0208 50 #define CHARSET_KSX1001 51 #define CHARSET_JISX0212 52 #define CHARSET_GB2312_8565 53 #define CHARSET_CNS11643_1 54 #define CHARSET_CNS11643_2 55 #define CHARSET_JISX0213_2000_1 56 #define CHARSET_JISX0213_2 57 #define CHARSET_JISX0213_2004_1 58 #define CHARSET_JISX0208_O 59 60 #define CHARSET_DBCS 61 #define ESCMARK(mark) 62 63 #define IS_ESCEND(c) 35 #define ESC 0x1B 36 #define SO 0x0E 37 #define SI 0x0F 38 #define LF 0x0A 39 40 #define MAX_ESCSEQLEN 16 41 42 #define CHARSET_ISO8859_1 'A' 43 #define CHARSET_ASCII 'B' 44 #define CHARSET_ISO8859_7 'F' 45 #define CHARSET_JISX0201_K 'I' 46 #define CHARSET_JISX0201_R 'J' 47 48 #define CHARSET_GB2312 ('A'|CHARSET_DBCS) 49 #define CHARSET_JISX0208 ('B'|CHARSET_DBCS) 50 #define CHARSET_KSX1001 ('C'|CHARSET_DBCS) 51 #define CHARSET_JISX0212 ('D'|CHARSET_DBCS) 52 #define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS) 53 #define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS) 54 #define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS) 55 #define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS) 56 #define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS) 57 #define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS) 58 #define CHARSET_JISX0208_O ('@'|CHARSET_DBCS) 59 60 #define CHARSET_DBCS 0x80 61 #define ESCMARK(mark) ((mark) & 0x7f) 62 63 #define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@') 64 64 #define IS_ISO2022ESC(c2) \ 65 66 67 68 69 70 #define MAP_UNMAPPABLE 71 #define MAP_MULTIPLE_AVAIL 72 73 #define F_SHIFTED 74 #define F_ESCTHROUGHOUT 75 76 #define STATE_SETG(dn, v) 77 #define STATE_GETG(dn) 78 79 #define STATE_G0 80 #define STATE_G1 81 #define STATE_G2 82 #define STATE_G3 83 #define STATE_SETG0(v) 84 #define STATE_SETG1(v) 85 #define STATE_SETG2(v) 86 #define STATE_SETG3(v) 87 88 #define STATE_SETFLAG(f) 89 #define STATE_GETFLAG(f) 90 #define STATE_CLEARFLAG(f) 91 #define STATE_CLEARFLAGS() 92 93 #define ISO2022_CONFIG 94 #define CONFIG_ISSET(flag) 95 #define CONFIG_DESIGNATIONS 65 ((c2) == '(' || (c2) == ')' || (c2) == '$' || \ 66 (c2) == '.' || (c2) == '&') 67 /* this is not a complete list of ISO-2022 escape sequence headers. 68 * but, it's enough to implement CJK instances of iso-2022. */ 69 70 #define MAP_UNMAPPABLE 0xFFFF 71 #define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */ 72 73 #define F_SHIFTED 0x01 74 #define F_ESCTHROUGHOUT 0x02 75 76 #define STATE_SETG(dn, v) ((state)->c[dn]) = (v); 77 #define STATE_GETG(dn) ((state)->c[dn]) 78 79 #define STATE_G0 STATE_GETG(0) 80 #define STATE_G1 STATE_GETG(1) 81 #define STATE_G2 STATE_GETG(2) 82 #define STATE_G3 STATE_GETG(3) 83 #define STATE_SETG0(v) STATE_SETG(0, v) 84 #define STATE_SETG1(v) STATE_SETG(1, v) 85 #define STATE_SETG2(v) STATE_SETG(2, v) 86 #define STATE_SETG3(v) STATE_SETG(3, v) 87 88 #define STATE_SETFLAG(f) ((state)->c[4]) |= (f); 89 #define STATE_GETFLAG(f) ((state)->c[4] & (f)) 90 #define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f); 91 #define STATE_CLEARFLAGS() ((state)->c[4]) = 0; 92 93 #define ISO2022_CONFIG ((const struct iso2022_config *)config) 94 #define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag)) 95 #define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations) 96 96 97 97 /* iso2022_config.flags */ 98 #define NO_SHIFT 99 #define USE_G2 100 #define USE_JISX0208_EXT 98 #define NO_SHIFT 0x01 99 #define USE_G2 0x02 100 #define USE_JISX0208_EXT 0x04 101 101 102 102 /*-*- internal data structures -*-*/ … … 107 107 108 108 struct iso2022_designation { 109 110 111 112 113 114 109 unsigned char mark; 110 unsigned char plane; 111 unsigned char width; 112 iso2022_init_func initializer; 113 iso2022_decode_func decoder; 114 iso2022_encode_func encoder; 115 115 }; 116 116 117 117 struct iso2022_config { 118 119 118 int flags; 119 const struct iso2022_designation *designations; /* non-ascii desigs */ 120 120 }; 121 121 … … 124 124 CODEC_INIT(iso2022) 125 125 { 126 127 128 129 130 126 const struct iso2022_designation *desig = CONFIG_DESIGNATIONS; 127 for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++) 128 if (desig->initializer != NULL && desig->initializer() != 0) 129 return -1; 130 return 0; 131 131 } 132 132 133 133 ENCODER_INIT(iso2022) 134 134 { 135 136 137 138 135 STATE_CLEARFLAGS() 136 STATE_SETG0(CHARSET_ASCII) 137 STATE_SETG1(CHARSET_ASCII) 138 return 0; 139 139 } 140 140 141 141 ENCODER_RESET(iso2022) 142 142 { 143 144 145 146 147 148 149 150 151 152 153 143 if (STATE_GETFLAG(F_SHIFTED)) { 144 WRITE1(SI) 145 NEXT_OUT(1) 146 STATE_CLEARFLAG(F_SHIFTED) 147 } 148 if (STATE_G0 != CHARSET_ASCII) { 149 WRITE3(ESC, '(', 'B') 150 NEXT_OUT(3) 151 STATE_SETG0(CHARSET_ASCII) 152 } 153 return 0; 154 154 } 155 155 156 156 ENCODER(iso2022) 157 157 { 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 158 while (inleft > 0) { 159 const struct iso2022_designation *dsg; 160 DBCHAR encoded; 161 ucs4_t c = **inbuf; 162 Py_ssize_t insize; 163 164 if (c < 0x80) { 165 if (STATE_G0 != CHARSET_ASCII) { 166 WRITE3(ESC, '(', 'B') 167 STATE_SETG0(CHARSET_ASCII) 168 NEXT_OUT(3) 169 } 170 if (STATE_GETFLAG(F_SHIFTED)) { 171 WRITE1(SI) 172 STATE_CLEARFLAG(F_SHIFTED) 173 NEXT_OUT(1) 174 } 175 WRITE1((unsigned char)c) 176 NEXT(1, 1) 177 continue; 178 } 179 180 DECODE_SURROGATE(c) 181 insize = GET_INSIZE(c); 182 183 encoded = MAP_UNMAPPABLE; 184 for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { 185 Py_ssize_t length = 1; 186 encoded = dsg->encoder(&c, &length); 187 if (encoded == MAP_MULTIPLE_AVAIL) { 188 /* this implementation won't work for pair 189 * of non-bmp characters. */ 190 if (inleft < 2) { 191 if (!(flags & MBENC_FLUSH)) 192 return MBERR_TOOFEW; 193 length = -1; 194 } 195 else 196 length = 2; 197 197 #if Py_UNICODE_SIZE == 2 198 199 200 201 202 203 204 198 if (length == 2) { 199 ucs4_t u4in[2]; 200 u4in[0] = (ucs4_t)IN1; 201 u4in[1] = (ucs4_t)IN2; 202 encoded = dsg->encoder(u4in, &length); 203 } else 204 encoded = dsg->encoder(&c, &length); 205 205 #else 206 206 encoded = dsg->encoder(&c, &length); 207 207 #endif 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 208 if (encoded != MAP_UNMAPPABLE) { 209 insize = length; 210 break; 211 } 212 } 213 else if (encoded != MAP_UNMAPPABLE) 214 break; 215 } 216 217 if (!dsg->mark) 218 return 1; 219 assert(dsg->width == 1 || dsg->width == 2); 220 221 switch (dsg->plane) { 222 case 0: /* G0 */ 223 if (STATE_GETFLAG(F_SHIFTED)) { 224 WRITE1(SI) 225 STATE_CLEARFLAG(F_SHIFTED) 226 NEXT_OUT(1) 227 } 228 if (STATE_G0 != dsg->mark) { 229 if (dsg->width == 1) { 230 WRITE3(ESC, '(', ESCMARK(dsg->mark)) 231 STATE_SETG0(dsg->mark) 232 NEXT_OUT(3) 233 } 234 else if (dsg->mark == CHARSET_JISX0208) { 235 WRITE3(ESC, '$', ESCMARK(dsg->mark)) 236 STATE_SETG0(dsg->mark) 237 NEXT_OUT(3) 238 } 239 else { 240 WRITE4(ESC, '$', '(', 241 ESCMARK(dsg->mark)) 242 STATE_SETG0(dsg->mark) 243 NEXT_OUT(4) 244 } 245 } 246 break; 247 case 1: /* G1 */ 248 if (STATE_G1 != dsg->mark) { 249 if (dsg->width == 1) { 250 WRITE3(ESC, ')', ESCMARK(dsg->mark)) 251 STATE_SETG1(dsg->mark) 252 NEXT_OUT(3) 253 } 254 else { 255 WRITE4(ESC, '$', ')', 256 ESCMARK(dsg->mark)) 257 STATE_SETG1(dsg->mark) 258 NEXT_OUT(4) 259 } 260 } 261 if (!STATE_GETFLAG(F_SHIFTED)) { 262 WRITE1(SO) 263 STATE_SETFLAG(F_SHIFTED) 264 NEXT_OUT(1) 265 } 266 break; 267 default: /* G2 and G3 is not supported: no encoding in 268 * CJKCodecs are using them yet */ 269 return MBERR_INTERNAL; 270 } 271 272 if (dsg->width == 1) { 273 WRITE1((unsigned char)encoded) 274 NEXT_OUT(1) 275 } 276 else { 277 WRITE2(encoded >> 8, encoded & 0xff) 278 NEXT_OUT(2) 279 } 280 NEXT_IN(insize) 281 } 282 283 return 0; 284 284 } 285 285 286 286 DECODER_INIT(iso2022) 287 287 { 288 289 290 291 292 288 STATE_CLEARFLAGS() 289 STATE_SETG0(CHARSET_ASCII) 290 STATE_SETG1(CHARSET_ASCII) 291 STATE_SETG2(CHARSET_ASCII) 292 return 0; 293 293 } 294 294 295 295 DECODER_RESET(iso2022) 296 296 { 297 298 299 297 STATE_SETG0(CHARSET_ASCII) 298 STATE_CLEARFLAG(F_SHIFTED) 299 return 0; 300 300 } 301 301 302 302 static Py_ssize_t 303 303 iso2022processesc(const void *config, MultibyteCodec_State *state, 304 305 { 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 } 378 379 #define ISO8859_7_DECODE(c, assi) 380 if ((c) < 0xa0) (assi) = (c);\381 else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0))))\382 (assi) = (c);\383 else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 ||\384 (0xbffffd77L & (1L << ((c)-0xb4)))))\385 (assi) = 0x02d0 + (c);\386 else if ((c) == 0xa1) (assi) = 0x2018;\387 else if ((c) == 0xa2) (assi) = 0x2019;\388 304 const unsigned char **inbuf, Py_ssize_t *inleft) 305 { 306 unsigned char charset, designation; 307 Py_ssize_t i, esclen; 308 309 for (i = 1;i < MAX_ESCSEQLEN;i++) { 310 if (i >= *inleft) 311 return MBERR_TOOFEW; 312 if (IS_ESCEND((*inbuf)[i])) { 313 esclen = i + 1; 314 break; 315 } 316 else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft && 317 (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') 318 i += 2; 319 } 320 321 if (i >= MAX_ESCSEQLEN) 322 return 1; /* unterminated escape sequence */ 323 324 switch (esclen) { 325 case 3: 326 if (IN2 == '$') { 327 charset = IN3 | CHARSET_DBCS; 328 designation = 0; 329 } 330 else { 331 charset = IN3; 332 if (IN2 == '(') designation = 0; 333 else if (IN2 == ')') designation = 1; 334 else if (CONFIG_ISSET(USE_G2) && IN2 == '.') 335 designation = 2; 336 else return 3; 337 } 338 break; 339 case 4: 340 if (IN2 != '$') 341 return 4; 342 343 charset = IN4 | CHARSET_DBCS; 344 if (IN3 == '(') designation = 0; 345 else if (IN3 == ')') designation = 1; 346 else return 4; 347 break; 348 case 6: /* designation with prefix */ 349 if (CONFIG_ISSET(USE_JISX0208_EXT) && 350 (*inbuf)[3] == ESC && (*inbuf)[4] == '$' && 351 (*inbuf)[5] == 'B') { 352 charset = 'B' | CHARSET_DBCS; 353 designation = 0; 354 } 355 else 356 return 6; 357 break; 358 default: 359 return esclen; 360 } 361 362 /* raise error when the charset is not designated for this encoding */ 363 if (charset != CHARSET_ASCII) { 364 const struct iso2022_designation *dsg; 365 366 for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) 367 if (dsg->mark == charset) 368 break; 369 if (!dsg->mark) 370 return esclen; 371 } 372 373 STATE_SETG(designation, charset) 374 *inleft -= esclen; 375 (*inbuf) += esclen; 376 return 0; 377 } 378 379 #define ISO8859_7_DECODE(c, assi) \ 380 if ((c) < 0xa0) (assi) = (c); \ 381 else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \ 382 (assi) = (c); \ 383 else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \ 384 (0xbffffd77L & (1L << ((c)-0xb4))))) \ 385 (assi) = 0x02d0 + (c); \ 386 else if ((c) == 0xa1) (assi) = 0x2018; \ 387 else if ((c) == 0xa2) (assi) = 0x2019; \ 388 else if ((c) == 0xaf) (assi) = 0x2015; 389 389 390 390 static Py_ssize_t 391 391 iso2022processg2(const void *config, MultibyteCodec_State *state, 392 393 394 { 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 392 const unsigned char **inbuf, Py_ssize_t *inleft, 393 Py_UNICODE **outbuf, Py_ssize_t *outleft) 394 { 395 /* not written to use encoder, decoder functions because only few 396 * encodings use G2 designations in CJKCodecs */ 397 if (STATE_G2 == CHARSET_ISO8859_1) { 398 if (IN3 < 0x80) 399 OUT1(IN3 + 0x80) 400 else 401 return 3; 402 } 403 else if (STATE_G2 == CHARSET_ISO8859_7) { 404 ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) 405 else return 3; 406 } 407 else if (STATE_G2 == CHARSET_ASCII) { 408 if (IN3 & 0x80) return 3; 409 else **outbuf = IN3; 410 } 411 else 412 return MBERR_INTERNAL; 413 414 (*inbuf) += 3; 415 *inleft -= 3; 416 (*outbuf) += 1; 417 *outleft -= 1; 418 return 0; 419 419 } 420 420 421 421 DECODER(iso2022) 422 422 { 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 bypass: 496 497 498 499 500 501 502 503 504 505 423 const struct iso2022_designation *dsgcache = NULL; 424 425 while (inleft > 0) { 426 unsigned char c = IN1; 427 Py_ssize_t err; 428 429 if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { 430 /* ESC throughout mode: 431 * for non-iso2022 escape sequences */ 432 WRITE1(c) /* assume as ISO-8859-1 */ 433 NEXT(1, 1) 434 if (IS_ESCEND(c)) { 435 STATE_CLEARFLAG(F_ESCTHROUGHOUT) 436 } 437 continue; 438 } 439 440 switch (c) { 441 case ESC: 442 REQUIRE_INBUF(2) 443 if (IS_ISO2022ESC(IN2)) { 444 err = iso2022processesc(config, state, 445 inbuf, &inleft); 446 if (err != 0) 447 return err; 448 } 449 else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ 450 REQUIRE_INBUF(3) 451 err = iso2022processg2(config, state, 452 inbuf, &inleft, outbuf, &outleft); 453 if (err != 0) 454 return err; 455 } 456 else { 457 WRITE1(ESC) 458 STATE_SETFLAG(F_ESCTHROUGHOUT) 459 NEXT(1, 1) 460 } 461 break; 462 case SI: 463 if (CONFIG_ISSET(NO_SHIFT)) 464 goto bypass; 465 STATE_CLEARFLAG(F_SHIFTED) 466 NEXT_IN(1) 467 break; 468 case SO: 469 if (CONFIG_ISSET(NO_SHIFT)) 470 goto bypass; 471 STATE_SETFLAG(F_SHIFTED) 472 NEXT_IN(1) 473 break; 474 case LF: 475 STATE_CLEARFLAG(F_SHIFTED) 476 WRITE1(LF) 477 NEXT(1, 1) 478 break; 479 default: 480 if (c < 0x20) /* C0 */ 481 goto bypass; 482 else if (c >= 0x80) 483 return 1; 484 else { 485 const struct iso2022_designation *dsg; 486 unsigned char charset; 487 ucs4_t decoded; 488 489 if (STATE_GETFLAG(F_SHIFTED)) 490 charset = STATE_G1; 491 else 492 charset = STATE_G0; 493 494 if (charset == CHARSET_ASCII) { 495 bypass: WRITE1(c) 496 NEXT(1, 1) 497 break; 498 } 499 500 if (dsgcache != NULL && 501 dsgcache->mark == charset) 502 dsg = dsgcache; 503 else { 504 for (dsg = CONFIG_DESIGNATIONS; 505 dsg->mark != charset 506 506 #ifdef Py_DEBUG 507 507 && dsg->mark != '\0' 508 508 #endif 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 509 ;dsg++) 510 /* noop */; 511 assert(dsg->mark != '\0'); 512 dsgcache = dsg; 513 } 514 515 REQUIRE_INBUF(dsg->width) 516 decoded = dsg->decoder(*inbuf); 517 if (decoded == MAP_UNMAPPABLE) 518 return dsg->width; 519 520 if (decoded < 0x10000) { 521 WRITE1(decoded) 522 NEXT_OUT(1) 523 } 524 else if (decoded < 0x30000) { 525 WRITEUCS4(decoded) 526 } 527 else { /* JIS X 0213 pairs */ 528 WRITE2(decoded >> 16, decoded & 0xffff) 529 NEXT_OUT(2) 530 } 531 NEXT_IN(dsg->width) 532 } 533 break; 534 } 535 } 536 return 0; 537 537 } 538 538 … … 568 568 ksx1001_init(void) 569 569 { 570 571 572 573 574 575 576 577 570 static int initialized = 0; 571 572 if (!initialized && ( 573 IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) || 574 IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap))) 575 return -1; 576 initialized = 1; 577 return 0; 578 578 } 579 579 … … 581 581 ksx1001_decoder(const unsigned char *data) 582 582 { 583 584 585 586 587 583 ucs4_t u; 584 TRYMAP_DEC(ksx1001, u, data[0], data[1]) 585 return u; 586 else 587 return MAP_UNMAPPABLE; 588 588 } 589 589 … … 591 591 ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) 592 592 { 593 594 595 596 597 598 599 600 593 DBCHAR coded; 594 assert(*length == 1); 595 if (*data < 0x10000) { 596 TRYMAP_ENC(cp949, coded, *data) 597 if (!(coded & 0x8000)) 598 return coded; 599 } 600 return MAP_UNMAPPABLE; 601 601 } 602 602 … … 604 604 jisx0208_init(void) 605 605 { 606 607 608 609 610 611 612 613 606 static int initialized = 0; 607 608 if (!initialized && ( 609 IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || 610 IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap))) 611 return -1; 612 initialized = 1; 613 return 0; 614 614 } 615 615 … … 617 617 jisx0208_decoder(const unsigned char *data) 618 618 { 619 620 621 622 623 624 625 619 ucs4_t u; 620 if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ 621 return 0xff3c; 622 else TRYMAP_DEC(jisx0208, u, data[0], data[1]) 623 return u; 624 else 625 return MAP_UNMAPPABLE; 626 626 } 627 627 … … 629 629 jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) 630 630 { 631 632 633 634 635 636 637 638 639 640 641 631 DBCHAR coded; 632 assert(*length == 1); 633 if (*data < 0x10000) { 634 if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */ 635 return 0x2140; 636 else TRYMAP_ENC(jisxcommon, coded, *data) { 637 if (!(coded & 0x8000)) 638 return coded; 639 } 640 } 641 return MAP_UNMAPPABLE; 642 642 } 643 643 … … 645 645 jisx0212_init(void) 646 646 { 647 648 649 650 651 652 653 654 647 static int initialized = 0; 648 649 if (!initialized && ( 650 IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || 651 IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap))) 652 return -1; 653 initialized = 1; 654 return 0; 655 655 } 656 656 … … 658 658 jisx0212_decoder(const unsigned char *data) 659 659 { 660 661 662 663 664 660 ucs4_t u; 661 TRYMAP_DEC(jisx0212, u, data[0], data[1]) 662 return u; 663 else 664 return MAP_UNMAPPABLE; 665 665 } 666 666 … … 668 668 jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) 669 669 { 670 671 672 673 674 675 676 677 678 670 DBCHAR coded; 671 assert(*length == 1); 672 if (*data < 0x10000) { 673 TRYMAP_ENC(jisxcommon, coded, *data) { 674 if (coded & 0x8000) 675 return coded & 0x7fff; 676 } 677 } 678 return MAP_UNMAPPABLE; 679 679 } 680 680 … … 682 682 jisx0213_init(void) 683 683 { 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 684 static int initialized = 0; 685 686 if (!initialized && ( 687 jisx0208_init() || 688 IMPORT_MAP(jp, jisx0213_bmp, 689 &jisx0213_bmp_encmap, NULL) || 690 IMPORT_MAP(jp, jisx0213_1_bmp, 691 NULL, &jisx0213_1_bmp_decmap) || 692 IMPORT_MAP(jp, jisx0213_2_bmp, 693 NULL, &jisx0213_2_bmp_decmap) || 694 IMPORT_MAP(jp, jisx0213_emp, 695 &jisx0213_emp_encmap, NULL) || 696 IMPORT_MAP(jp, jisx0213_1_emp, 697 NULL, &jisx0213_1_emp_decmap) || 698 IMPORT_MAP(jp, jisx0213_2_emp, 699 NULL, &jisx0213_2_emp_decmap) || 700 IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap, 701 &jisx0213_pair_decmap))) 702 return -1; 703 initialized = 1; 704 return 0; 705 705 } 706 706 … … 709 709 jisx0213_2000_1_decoder(const unsigned char *data) 710 710 { 711 712 713 714 715 716 717 718 719 720 721 722 711 ucs4_t u; 712 EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1]) 713 else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ 714 return 0xff3c; 715 else TRYMAP_DEC(jisx0208, u, data[0], data[1]); 716 else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); 717 else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) 718 u |= 0x20000; 719 else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); 720 else 721 return MAP_UNMAPPABLE; 722 return u; 723 723 } 724 724 … … 726 726 jisx0213_2000_2_decoder(const unsigned char *data) 727 727 { 728 729 730 731 732 733 734 735 728 ucs4_t u; 729 EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1]) 730 TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); 731 else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) 732 u |= 0x20000; 733 else 734 return MAP_UNMAPPABLE; 735 return u; 736 736 } 737 737 #undef config … … 740 740 jisx0213_2004_1_decoder(const unsigned char *data) 741 741 { 742 743 744 745 746 747 748 749 750 751 752 742 ucs4_t u; 743 if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ 744 return 0xff3c; 745 else TRYMAP_DEC(jisx0208, u, data[0], data[1]); 746 else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]); 747 else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]) 748 u |= 0x20000; 749 else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]); 750 else 751 return MAP_UNMAPPABLE; 752 return u; 753 753 } 754 754 … … 756 756 jisx0213_2004_2_decoder(const unsigned char *data) 757 757 { 758 759 760 761 762 763 764 758 ucs4_t u; 759 TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]); 760 else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]) 761 u |= 0x20000; 762 else 763 return MAP_UNMAPPABLE; 764 return u; 765 765 } 766 766 … … 768 768 jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) 769 769 { 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 770 DBCHAR coded; 771 772 switch (*length) { 773 case 1: /* first character */ 774 if (*data >= 0x10000) { 775 if ((*data) >> 16 == 0x20000 >> 16) { 776 EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data) 777 else TRYMAP_ENC(jisx0213_emp, coded, 778 (*data) & 0xffff) 779 return coded; 780 } 781 return MAP_UNMAPPABLE; 782 } 783 784 EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data) 785 else TRYMAP_ENC(jisx0213_bmp, coded, *data) { 786 if (coded == MULTIC) 787 return MAP_MULTIPLE_AVAIL; 788 } 789 else TRYMAP_ENC(jisxcommon, coded, *data) { 790 if (coded & 0x8000) 791 return MAP_UNMAPPABLE; 792 } 793 else 794 return MAP_UNMAPPABLE; 795 return coded; 796 case 2: /* second character of unicode pair */ 797 coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], 798 jisx0213_pair_encmap, JISX0213_ENCPAIRS); 799 if (coded == DBCINV) { 800 *length = 1; 801 coded = find_pairencmap((ucs2_t)data[0], 0, 802 jisx0213_pair_encmap, JISX0213_ENCPAIRS); 803 if (coded == DBCINV) 804 return MAP_UNMAPPABLE; 805 } 806 else 807 return coded; 808 case -1: /* flush unterminated */ 809 *length = 1; 810 coded = find_pairencmap((ucs2_t)data[0], 0, 811 jisx0213_pair_encmap, JISX0213_ENCPAIRS); 812 if (coded == DBCINV) 813 return MAP_UNMAPPABLE; 814 else 815 return coded; 816 default: 817 return MAP_UNMAPPABLE; 818 } 819 819 } 820 820 … … 822 822 jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) 823 823 { 824 825 826 827 828 829 830 824 DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); 825 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) 826 return coded; 827 else if (coded & 0x8000) 828 return MAP_UNMAPPABLE; 829 else 830 return coded; 831 831 } 832 832 … … 834 834 jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) 835 835 { 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 836 DBCHAR coded; 837 Py_ssize_t ilength = *length; 838 839 coded = jisx0213_encoder(data, length, (void *)2000); 840 switch (ilength) { 841 case 1: 842 if (coded == MAP_MULTIPLE_AVAIL) 843 return MAP_MULTIPLE_AVAIL; 844 else 845 return MAP_UNMAPPABLE; 846 case 2: 847 if (*length != 2) 848 return MAP_UNMAPPABLE; 849 else 850 return coded; 851 default: 852 return MAP_UNMAPPABLE; 853 } 854 854 } 855 855 … … 857 857 jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) 858 858 { 859 860 861 862 863 864 865 859 DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); 860 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) 861 return coded; 862 else if (coded & 0x8000) 863 return coded & 0x7fff; 864 else 865 return MAP_UNMAPPABLE; 866 866 } 867 867 … … 869 869 jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) 870 870 { 871 872 873 874 875 876 877 871 DBCHAR coded = jisx0213_encoder(data, length, NULL); 872 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) 873 return coded; 874 else if (coded & 0x8000) 875 return MAP_UNMAPPABLE; 876 else 877 return coded; 878 878 } 879 879 … … 881 881 jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) 882 882 { 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 883 DBCHAR coded; 884 Py_ssize_t ilength = *length; 885 886 coded = jisx0213_encoder(data, length, NULL); 887 switch (ilength) { 888 case 1: 889 if (coded == MAP_MULTIPLE_AVAIL) 890 return MAP_MULTIPLE_AVAIL; 891 else 892 return MAP_UNMAPPABLE; 893 case 2: 894 if (*length != 2) 895 return MAP_UNMAPPABLE; 896 else 897 return coded; 898 default: 899 return MAP_UNMAPPABLE; 900 } 901 901 } 902 902 … … 904 904 jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) 905 905 { 906 907 908 909 910 911 912 906 DBCHAR coded = jisx0213_encoder(data, length, NULL); 907 if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) 908 return coded; 909 else if (coded & 0x8000) 910 return coded & 0x7fff; 911 else 912 return MAP_UNMAPPABLE; 913 913 } 914 914 … … 916 916 jisx0201_r_decoder(const unsigned char *data) 917 917 { 918 919 920 921 918 ucs4_t u; 919 JISX0201_R_DECODE(*data, u) 920 else return MAP_UNMAPPABLE; 921 return u; 922 922 } 923 923 … … 925 925 jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length) 926 926 { 927 928 929 930 927 DBCHAR coded; 928 JISX0201_R_ENCODE(*data, coded) 929 else return MAP_UNMAPPABLE; 930 return coded; 931 931 } 932 932 … … 934 934 jisx0201_k_decoder(const unsigned char *data) 935 935 { 936 937 938 939 936 ucs4_t u; 937 JISX0201_K_DECODE(*data ^ 0x80, u) 938 else return MAP_UNMAPPABLE; 939 return u; 940 940 } 941 941 … … 943 943 jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length) 944 944 { 945 946 947 948 945 DBCHAR coded; 946 JISX0201_K_ENCODE(*data, coded) 947 else return MAP_UNMAPPABLE; 948 return coded - 0x80; 949 949 } 950 950 … … 952 952 gb2312_init(void) 953 953 { 954 955 956 957 958 959 960 961 954 static int initialized = 0; 955 956 if (!initialized && ( 957 IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) || 958 IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap))) 959 return -1; 960 initialized = 1; 961 return 0; 962 962 } 963 963 … … 965 965 gb2312_decoder(const unsigned char *data) 966 966 { 967 968 969 970 971 967 ucs4_t u; 968 TRYMAP_DEC(gb2312, u, data[0], data[1]) 969 return u; 970 else 971 return MAP_UNMAPPABLE; 972 972 } 973 973 … … 975 975 gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) 976 976 { 977 978 979 980 981 982 983 984 985 977 DBCHAR coded; 978 assert(*length == 1); 979 if (*data < 0x10000) { 980 TRYMAP_ENC(gbcommon, coded, *data) { 981 if (!(coded & 0x8000)) 982 return coded; 983 } 984 } 985 return MAP_UNMAPPABLE; 986 986 } 987 987 … … 990 990 dummy_decoder(const unsigned char *data) 991 991 { 992 992 return MAP_UNMAPPABLE; 993 993 } 994 994 … … 996 996 dummy_encoder(const ucs4_t *data, Py_ssize_t *length) 997 997 { 998 998 return MAP_UNMAPPABLE; 999 999 } 1000 1000 1001 1001 /*-*- registry tables -*-*/ 1002 1002 1003 #define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2,\1004 ksx1001_init,\1005 1006 #define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2,\1007 ksx1001_init,\1008 1009 #define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1,\1010 NULL,\1011 1012 #define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1,\1013 NULL,\1014 1015 #define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2,\1016 jisx0208_init,\1017 1018 #define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2,\1019 jisx0208_init,\1020 1021 #define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2,\1022 jisx0212_init,\1023 1024 #define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, 1025 jisx0213_init,\1026 jisx0213_2000_1_decoder,\1027 1003 #define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \ 1004 ksx1001_init, \ 1005 ksx1001_decoder, ksx1001_encoder } 1006 #define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \ 1007 ksx1001_init, \ 1008 ksx1001_decoder, ksx1001_encoder } 1009 #define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \ 1010 NULL, \ 1011 jisx0201_r_decoder, jisx0201_r_encoder } 1012 #define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \ 1013 NULL, \ 1014 jisx0201_k_decoder, jisx0201_k_encoder } 1015 #define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \ 1016 jisx0208_init, \ 1017 jisx0208_decoder, jisx0208_encoder } 1018 #define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \ 1019 jisx0208_init, \ 1020 jisx0208_decoder, jisx0208_encoder } 1021 #define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \ 1022 jisx0212_init, \ 1023 jisx0212_decoder, jisx0212_encoder } 1024 #define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \ 1025 jisx0213_init, \ 1026 jisx0213_2000_1_decoder, \ 1027 jisx0213_2000_1_encoder } 1028 1028 #define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \ 1029 jisx0213_init,\1030 jisx0213_2000_1_decoder,\1031 1032 #define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, 1033 jisx0213_init,\1034 jisx0213_2000_2_decoder,\1035 1036 #define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, 1037 jisx0213_init,\1038 jisx0213_2004_1_decoder,\1039 1029 jisx0213_init, \ 1030 jisx0213_2000_1_decoder, \ 1031 jisx0213_2000_1_encoder_paironly } 1032 #define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \ 1033 jisx0213_init, \ 1034 jisx0213_2000_2_decoder, \ 1035 jisx0213_2000_2_encoder } 1036 #define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \ 1037 jisx0213_init, \ 1038 jisx0213_2004_1_decoder, \ 1039 jisx0213_2004_1_encoder } 1040 1040 #define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \ 1041 jisx0213_init,\1042 jisx0213_2004_1_decoder,\1043 1044 #define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, 1045 jisx0213_init,\1046 jisx0213_2004_2_decoder,\1047 1048 #define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2,\1049 gb2312_init,\1050 1051 #define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2,\1052 cns11643_init,\1053 1054 #define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2,\1055 cns11643_init,\1056 1057 #define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1,\1058 1059 #define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1,\1060 1061 #define REGISTRY_SENTINEL 1062 #define CONFIGDEF(var, attrs) 1063 static const struct iso2022_config iso2022_##var##_config = {\1064 attrs, iso2022_##var##_designations\1065 1041 jisx0213_init, \ 1042 jisx0213_2004_1_decoder, \ 1043 jisx0213_2004_1_encoder_paironly } 1044 #define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \ 1045 jisx0213_init, \ 1046 jisx0213_2004_2_decoder, \ 1047 jisx0213_2004_2_encoder } 1048 #define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \ 1049 gb2312_init, \ 1050 gb2312_decoder, gb2312_encoder } 1051 #define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \ 1052 cns11643_init, \ 1053 cns11643_1_decoder, cns11643_1_encoder } 1054 #define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \ 1055 cns11643_init, \ 1056 cns11643_2_decoder, cns11643_2_encoder } 1057 #define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \ 1058 NULL, dummy_decoder, dummy_encoder } 1059 #define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \ 1060 NULL, dummy_decoder, dummy_encoder } 1061 #define REGISTRY_SENTINEL { 0, } 1062 #define CONFIGDEF(var, attrs) \ 1063 static const struct iso2022_config iso2022_##var##_config = { \ 1064 attrs, iso2022_##var##_designations \ 1065 }; 1066 1066 1067 1067 static const struct iso2022_designation iso2022_kr_designations[] = { 1068 1068 REGISTRY_KSX1001_G1, REGISTRY_SENTINEL 1069 1069 }; 1070 1070 CONFIGDEF(kr, 0) 1071 1071 1072 1072 static const struct iso2022_designation iso2022_jp_designations[] = { 1073 1074 1073 REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, 1074 REGISTRY_SENTINEL 1075 1075 }; 1076 1076 CONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT) 1077 1077 1078 1078 static const struct iso2022_designation iso2022_jp_1_designations[] = { 1079 1080 1079 REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, 1080 REGISTRY_JISX0208_O, REGISTRY_SENTINEL 1081 1081 }; 1082 1082 CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT) 1083 1083 1084 1084 static const struct iso2022_designation iso2022_jp_2_designations[] = { 1085 1086 1087 1085 REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0, 1086 REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, 1087 REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL 1088 1088 }; 1089 1089 CONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT) 1090 1090 1091 1091 static const struct iso2022_designation iso2022_jp_2004_designations[] = { 1092 1093 1092 REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208, 1093 REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL 1094 1094 }; 1095 1095 CONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT) 1096 1096 1097 1097 static const struct iso2022_designation iso2022_jp_3_designations[] = { 1098 1099 1098 REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208, 1099 REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL 1100 1100 }; 1101 1101 CONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT) 1102 1102 1103 1103 static const struct iso2022_designation iso2022_jp_ext_designations[] = { 1104 1105 1104 REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, 1105 REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL 1106 1106 }; 1107 1107 CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT) … … 1112 1112 END_MAPPINGS_LIST 1113 1113 1114 #define ISO2022_CODEC(variation) { 1115 "iso2022_" #variation,\1116 &iso2022_##variation##_config,\1117 iso2022_codec_init,\1118 _STATEFUL_METHODS(iso2022)\1114 #define ISO2022_CODEC(variation) { \ 1115 "iso2022_" #variation, \ 1116 &iso2022_##variation##_config, \ 1117 iso2022_codec_init, \ 1118 _STATEFUL_METHODS(iso2022) \ 1119 1119 }, 1120 1120 -
python/vendor/current/Modules/cjkcodecs/_codecs_jp.c
r2 r388 20 20 ENCODER(cp932) 21 21 { 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 22 while (inleft > 0) { 23 Py_UNICODE c = IN1; 24 DBCHAR code; 25 unsigned char c1, c2; 26 27 if (c <= 0x80) { 28 WRITE1((unsigned char)c) 29 NEXT(1, 1) 30 continue; 31 } 32 else if (c >= 0xff61 && c <= 0xff9f) { 33 WRITE1(c - 0xfec0) 34 NEXT(1, 1) 35 continue; 36 } 37 else if (c >= 0xf8f0 && c <= 0xf8f3) { 38 /* Windows compatibility */ 39 REQUIRE_OUTBUF(1) 40 if (c == 0xf8f0) 41 OUT1(0xa0) 42 else 43 OUT1(c - 0xfef1 + 0xfd) 44 NEXT(1, 1) 45 continue; 46 } 47 48 UCS4INVALID(c) 49 REQUIRE_OUTBUF(2) 50 51 TRYMAP_ENC(cp932ext, code, c) { 52 OUT1(code >> 8) 53 OUT2(code & 0xff) 54 } 55 else TRYMAP_ENC(jisxcommon, code, c) { 56 if (code & 0x8000) /* MSB set: JIS X 0212 */ 57 return 1; 58 59 /* JIS X 0208 */ 60 c1 = code >> 8; 61 c2 = code & 0xff; 62 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); 63 c1 = (c1 - 0x21) >> 1; 64 OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) 65 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) 66 } 67 else if (c >= 0xe000 && c < 0xe758) { 68 /* User-defined area */ 69 c1 = (Py_UNICODE)(c - 0xe000) / 188; 70 c2 = (Py_UNICODE)(c - 0xe000) % 188; 71 OUT1(c1 + 0xf0) 72 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) 73 } 74 else 75 return 1; 76 77 NEXT(1, 2) 78 } 79 80 return 0; 81 81 } 82 82 83 83 DECODER(cp932) 84 84 { 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 85 while (inleft > 0) { 86 unsigned char c = IN1, c2; 87 88 REQUIRE_OUTBUF(1) 89 if (c <= 0x80) { 90 OUT1(c) 91 NEXT(1, 1) 92 continue; 93 } 94 else if (c >= 0xa0 && c <= 0xdf) { 95 if (c == 0xa0) 96 OUT1(0xf8f0) /* half-width katakana */ 97 else 98 OUT1(0xfec0 + c) 99 NEXT(1, 1) 100 continue; 101 } 102 else if (c >= 0xfd/* && c <= 0xff*/) { 103 /* Windows compatibility */ 104 OUT1(0xf8f1 - 0xfd + c) 105 NEXT(1, 1) 106 continue; 107 } 108 109 REQUIRE_INBUF(2) 110 c2 = IN2; 111 112 TRYMAP_DEC(cp932ext, **outbuf, c, c2); 113 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ 114 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) 115 return 2; 116 117 c = (c < 0xe0 ? c - 0x81 : c - 0xc1); 118 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); 119 c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21); 120 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; 121 122 TRYMAP_DEC(jisx0208, **outbuf, c, c2); 123 else return 2; 124 } 125 else if (c >= 0xf0 && c <= 0xf9) { 126 if ((c2 >= 0x40 && c2 <= 0x7e) || 127 (c2 >= 0x80 && c2 <= 0xfc)) 128 OUT1(0xe000 + 188 * (c - 0xf0) + 129 (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) 130 else 131 return 2; 132 } 133 else 134 return 2; 135 136 NEXT(2, 1) 137 } 138 139 return 0; 140 140 } 141 141 … … 147 147 ENCODER(euc_jis_2004) 148 148 { 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 149 while (inleft > 0) { 150 ucs4_t c = IN1; 151 DBCHAR code; 152 Py_ssize_t insize; 153 154 if (c < 0x80) { 155 WRITE1(c) 156 NEXT(1, 1) 157 continue; 158 } 159 160 DECODE_SURROGATE(c) 161 insize = GET_INSIZE(c); 162 163 if (c <= 0xFFFF) { 164 EMULATE_JISX0213_2000_ENCODE_BMP(code, c) 165 else TRYMAP_ENC(jisx0213_bmp, code, c) { 166 if (code == MULTIC) { 167 if (inleft < 2) { 168 if (flags & MBENC_FLUSH) { 169 code = find_pairencmap( 170 (ucs2_t)c, 0, 171 jisx0213_pair_encmap, 172 JISX0213_ENCPAIRS); 173 if (code == DBCINV) 174 return 1; 175 } 176 else 177 return MBERR_TOOFEW; 178 } 179 else { 180 code = find_pairencmap( 181 (ucs2_t)c, (*inbuf)[1], 182 jisx0213_pair_encmap, 183 JISX0213_ENCPAIRS); 184 if (code == DBCINV) { 185 code = find_pairencmap( 186 (ucs2_t)c, 0, 187 jisx0213_pair_encmap, 188 JISX0213_ENCPAIRS); 189 if (code == DBCINV) 190 return 1; 191 } else 192 insize = 2; 193 } 194 } 195 } 196 else TRYMAP_ENC(jisxcommon, code, c); 197 else if (c >= 0xff61 && c <= 0xff9f) { 198 /* JIS X 0201 half-width katakana */ 199 WRITE2(0x8e, c - 0xfec0) 200 NEXT(1, 2) 201 continue; 202 } 203 else if (c == 0xff3c) 204 /* F/W REVERSE SOLIDUS (see NOTES) */ 205 code = 0x2140; 206 else if (c == 0xff5e) 207 /* F/W TILDE (see NOTES) */ 208 code = 0x2232; 209 else 210 return 1; 211 } 212 else if (c >> 16 == EMPBASE >> 16) { 213 EMULATE_JISX0213_2000_ENCODE_EMP(code, c) 214 else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff); 215 else return insize; 216 } 217 else 218 return insize; 219 220 if (code & 0x8000) { 221 /* Codeset 2 */ 222 WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) 223 NEXT(insize, 3) 224 } else { 225 /* Codeset 1 */ 226 WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) 227 NEXT(insize, 2) 228 } 229 } 230 231 return 0; 232 232 } 233 233 234 234 DECODER(euc_jis_2004) 235 235 { 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 236 while (inleft > 0) { 237 unsigned char c = IN1; 238 ucs4_t code; 239 240 REQUIRE_OUTBUF(1) 241 242 if (c < 0x80) { 243 OUT1(c) 244 NEXT(1, 1) 245 continue; 246 } 247 248 if (c == 0x8e) { 249 /* JIS X 0201 half-width katakana */ 250 unsigned char c2; 251 252 REQUIRE_INBUF(2) 253 c2 = IN2; 254 if (c2 >= 0xa1 && c2 <= 0xdf) { 255 OUT1(0xfec0 + c2) 256 NEXT(2, 1) 257 } 258 else 259 return 2; 260 } 261 else if (c == 0x8f) { 262 unsigned char c2, c3; 263 264 REQUIRE_INBUF(3) 265 c2 = IN2 ^ 0x80; 266 c3 = IN3 ^ 0x80; 267 268 /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */ 269 EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3) 270 else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ; 271 else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) { 272 WRITEUCS4(EMPBASE | code) 273 NEXT_IN(3) 274 continue; 275 } 276 else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; 277 else return 3; 278 NEXT(3, 1) 279 } 280 else { 281 unsigned char c2; 282 283 REQUIRE_INBUF(2) 284 c ^= 0x80; 285 c2 = IN2 ^ 0x80; 286 287 /* JIS X 0213 Plane 1 */ 288 EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2) 289 else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; 290 else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e; 291 else TRYMAP_DEC(jisx0208, **outbuf, c, c2); 292 else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); 293 else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { 294 WRITEUCS4(EMPBASE | code) 295 NEXT_IN(2) 296 continue; 297 } 298 else TRYMAP_DEC(jisx0213_pair, code, c, c2) { 299 WRITE2(code >> 16, code & 0xffff) 300 NEXT(2, 2) 301 continue; 302 } 303 else return 2; 304 NEXT(2, 1) 305 } 306 } 307 308 return 0; 309 309 } 310 310 … … 316 316 ENCODER(euc_jp) 317 317 { 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 318 while (inleft > 0) { 319 Py_UNICODE c = IN1; 320 DBCHAR code; 321 322 if (c < 0x80) { 323 WRITE1((unsigned char)c) 324 NEXT(1, 1) 325 continue; 326 } 327 328 UCS4INVALID(c) 329 330 TRYMAP_ENC(jisxcommon, code, c); 331 else if (c >= 0xff61 && c <= 0xff9f) { 332 /* JIS X 0201 half-width katakana */ 333 WRITE2(0x8e, c - 0xfec0) 334 NEXT(1, 2) 335 continue; 336 } 337 337 #ifndef STRICT_BUILD 338 339 340 341 342 343 344 345 346 347 348 338 else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */ 339 code = 0x2140; 340 else if (c == 0xa5) { /* YEN SIGN */ 341 WRITE1(0x5c); 342 NEXT(1, 1) 343 continue; 344 } else if (c == 0x203e) { /* OVERLINE */ 345 WRITE1(0x7e); 346 NEXT(1, 1) 347 continue; 348 } 349 349 #endif 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 350 else 351 return 1; 352 353 if (code & 0x8000) { 354 /* JIS X 0212 */ 355 WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) 356 NEXT(1, 3) 357 } else { 358 /* JIS X 0208 */ 359 WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) 360 NEXT(1, 2) 361 } 362 } 363 364 return 0; 365 365 } 366 366 367 367 DECODER(euc_jp) 368 368 { 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 369 while (inleft > 0) { 370 unsigned char c = IN1; 371 372 REQUIRE_OUTBUF(1) 373 374 if (c < 0x80) { 375 OUT1(c) 376 NEXT(1, 1) 377 continue; 378 } 379 380 if (c == 0x8e) { 381 /* JIS X 0201 half-width katakana */ 382 unsigned char c2; 383 384 REQUIRE_INBUF(2) 385 c2 = IN2; 386 if (c2 >= 0xa1 && c2 <= 0xdf) { 387 OUT1(0xfec0 + c2) 388 NEXT(2, 1) 389 } 390 else 391 return 2; 392 } 393 else if (c == 0x8f) { 394 unsigned char c2, c3; 395 396 REQUIRE_INBUF(3) 397 c2 = IN2; 398 c3 = IN3; 399 /* JIS X 0212 */ 400 TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) { 401 NEXT(3, 1) 402 } 403 else 404 return 3; 405 } 406 else { 407 unsigned char c2; 408 409 REQUIRE_INBUF(2) 410 c2 = IN2; 411 /* JIS X 0208 */ 412 412 #ifndef STRICT_BUILD 413 414 415 416 413 if (c == 0xa1 && c2 == 0xc0) 414 /* FULL-WIDTH REVERSE SOLIDUS */ 415 **outbuf = 0xff3c; 416 else 417 417 #endif 418 419 420 421 422 423 424 425 418 TRYMAP_DEC(jisx0208, **outbuf, 419 c ^ 0x80, c2 ^ 0x80) ; 420 else return 2; 421 NEXT(2, 1) 422 } 423 } 424 425 return 0; 426 426 } 427 427 … … 433 433 ENCODER(shift_jis) 434 434 { 435 436 437 438 435 while (inleft > 0) { 436 Py_UNICODE c = IN1; 437 DBCHAR code; 438 unsigned char c1, c2; 439 439 440 440 #ifdef STRICT_BUILD 441 441 JISX0201_R_ENCODE(c, code) 442 442 #else 443 444 445 443 if (c < 0x80) code = c; 444 else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */ 445 else if (c == 0x203e) code = 0x7e; /* OVERLINE */ 446 446 #endif 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 447 else JISX0201_K_ENCODE(c, code) 448 else UCS4INVALID(c) 449 else code = NOCHAR; 450 451 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { 452 REQUIRE_OUTBUF(1) 453 454 OUT1((unsigned char)code) 455 NEXT(1, 1) 456 continue; 457 } 458 459 REQUIRE_OUTBUF(2) 460 461 if (code == NOCHAR) { 462 TRYMAP_ENC(jisxcommon, code, c); 463 463 #ifndef STRICT_BUILD 464 465 464 else if (c == 0xff3c) 465 code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */ 466 466 #endif 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 467 else 468 return 1; 469 470 if (code & 0x8000) /* MSB set: JIS X 0212 */ 471 return 1; 472 } 473 474 c1 = code >> 8; 475 c2 = code & 0xff; 476 c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); 477 c1 = (c1 - 0x21) >> 1; 478 OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) 479 OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) 480 NEXT(1, 2) 481 } 482 483 return 0; 484 484 } 485 485 486 486 DECODER(shift_jis) 487 487 { 488 489 490 491 488 while (inleft > 0) { 489 unsigned char c = IN1; 490 491 REQUIRE_OUTBUF(1) 492 492 493 493 #ifdef STRICT_BUILD 494 494 JISX0201_R_DECODE(c, **outbuf) 495 495 #else 496 496 if (c < 0x80) **outbuf = c; 497 497 #endif 498 499 500 501 502 503 504 505 506 507 508 509 510 498 else JISX0201_K_DECODE(c, **outbuf) 499 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ 500 unsigned char c1, c2; 501 502 REQUIRE_INBUF(2) 503 c2 = IN2; 504 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) 505 return 2; 506 507 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); 508 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); 509 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21); 510 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; 511 511 512 512 #ifndef STRICT_BUILD 513 514 515 516 517 518 513 if (c1 == 0x21 && c2 == 0x40) { 514 /* FULL-WIDTH REVERSE SOLIDUS */ 515 OUT1(0xff3c) 516 NEXT(2, 1) 517 continue; 518 } 519 519 #endif 520 521 522 523 524 525 526 527 528 529 530 531 532 533 520 TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { 521 NEXT(2, 1) 522 continue; 523 } 524 else 525 return 2; 526 } 527 else 528 return 2; 529 530 NEXT(1, 1) /* JIS X 0201 */ 531 } 532 533 return 0; 534 534 } 535 535 … … 541 541 ENCODER(shift_jis_2004) 542 542 { 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 543 while (inleft > 0) { 544 ucs4_t c = IN1; 545 DBCHAR code = NOCHAR; 546 int c1, c2; 547 Py_ssize_t insize; 548 549 JISX0201_ENCODE(c, code) 550 else DECODE_SURROGATE(c) 551 552 if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { 553 WRITE1((unsigned char)code) 554 NEXT(1, 1) 555 continue; 556 } 557 558 REQUIRE_OUTBUF(2) 559 insize = GET_INSIZE(c); 560 561 if (code == NOCHAR) { 562 if (c <= 0xffff) { 563 EMULATE_JISX0213_2000_ENCODE_BMP(code, c) 564 else TRYMAP_ENC(jisx0213_bmp, code, c) { 565 if (code == MULTIC) { 566 if (inleft < 2) { 567 if (flags & MBENC_FLUSH) { 568 code = find_pairencmap 569 ((ucs2_t)c, 0, 570 jisx0213_pair_encmap, 571 JISX0213_ENCPAIRS); 572 if (code == DBCINV) 573 return 1; 574 } 575 else 576 return MBERR_TOOFEW; 577 } 578 else { 579 code = find_pairencmap( 580 (ucs2_t)c, IN2, 581 jisx0213_pair_encmap, 582 JISX0213_ENCPAIRS); 583 if (code == DBCINV) { 584 code = find_pairencmap( 585 (ucs2_t)c, 0, 586 jisx0213_pair_encmap, 587 JISX0213_ENCPAIRS); 588 if (code == DBCINV) 589 return 1; 590 } 591 else 592 insize = 2; 593 } 594 } 595 } 596 else TRYMAP_ENC(jisxcommon, code, c) { 597 /* abandon JIS X 0212 codes */ 598 if (code & 0x8000) 599 return 1; 600 } 601 else return 1; 602 } 603 else if (c >> 16 == EMPBASE >> 16) { 604 EMULATE_JISX0213_2000_ENCODE_EMP(code, c) 605 else TRYMAP_ENC(jisx0213_emp, code, c&0xffff); 606 else return insize; 607 } 608 else 609 return insize; 610 } 611 612 c1 = code >> 8; 613 c2 = (code & 0xff) - 0x21; 614 615 if (c1 & 0x80) { /* Plane 2 */ 616 if (c1 >= 0xee) c1 -= 0x87; 617 else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49; 618 else c1 -= 0x43; 619 } 620 else /* Plane 1 */ 621 c1 -= 0x21; 622 623 if (c1 & 1) c2 += 0x5e; 624 c1 >>= 1; 625 OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)) 626 OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41)) 627 628 NEXT(insize, 2) 629 } 630 631 return 0; 632 632 } 633 633 634 634 DECODER(shift_jis_2004) 635 635 { 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 636 while (inleft > 0) { 637 unsigned char c = IN1; 638 639 REQUIRE_OUTBUF(1) 640 JISX0201_DECODE(c, **outbuf) 641 else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){ 642 unsigned char c1, c2; 643 ucs4_t code; 644 645 REQUIRE_INBUF(2) 646 c2 = IN2; 647 if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) 648 return 2; 649 650 c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); 651 c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); 652 c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1)); 653 c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; 654 655 if (c1 < 0x5e) { /* Plane 1 */ 656 c1 += 0x21; 657 EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, 658 c1, c2) 659 else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { 660 NEXT_OUT(1) 661 } 662 else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, 663 c1, c2) { 664 NEXT_OUT(1) 665 } 666 else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) { 667 WRITEUCS4(EMPBASE | code) 668 } 669 else TRYMAP_DEC(jisx0213_pair, code, c1, c2) { 670 WRITE2(code >> 16, code & 0xffff) 671 NEXT_OUT(2) 672 } 673 else 674 return 2; 675 NEXT_IN(2) 676 } 677 else { /* Plane 2 */ 678 if (c1 >= 0x67) c1 += 0x07; 679 else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37; 680 else c1 -= 0x3d; 681 682 EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, 683 c1, c2) 684 else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, 685 c1, c2) ; 686 else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) { 687 WRITEUCS4(EMPBASE | code) 688 NEXT_IN(2) 689 continue; 690 } 691 else 692 return 2; 693 NEXT(2, 1) 694 } 695 continue; 696 } 697 else 698 return 2; 699 700 NEXT(1, 1) /* JIS X 0201 */ 701 } 702 703 return 0; 704 704 } 705 705 -
python/vendor/current/Modules/cjkcodecs/_codecs_kr.c
r2 r388 12 12 */ 13 13 14 #define EUCKR_JAMO_FIRSTBYTE 15 #define EUCKR_JAMO_FILLER 14 #define EUCKR_JAMO_FIRSTBYTE 0xA4 15 #define EUCKR_JAMO_FILLER 0xD4 16 16 17 17 static const unsigned char u2cgk_choseong[19] = { 18 19 20 18 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 19 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 20 0xbc, 0xbd, 0xbe 21 21 }; 22 22 static const unsigned char u2cgk_jungseong[21] = { 23 24 25 23 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 24 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 25 0xcf, 0xd0, 0xd1, 0xd2, 0xd3 26 26 }; 27 27 static const unsigned char u2cgk_jongseong[28] = { 28 29 30 31 28 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 29 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 30 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba, 31 0xbb, 0xbc, 0xbd, 0xbe 32 32 }; 33 33 34 34 ENCODER(euc_kr) 35 35 { 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 else {/* Mapping is found in CP949 extension,58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 } 86 87 #define NONE 36 while (inleft > 0) { 37 Py_UNICODE c = IN1; 38 DBCHAR code; 39 40 if (c < 0x80) { 41 WRITE1((unsigned char)c) 42 NEXT(1, 1) 43 continue; 44 } 45 UCS4INVALID(c) 46 47 REQUIRE_OUTBUF(2) 48 TRYMAP_ENC(cp949, code, c); 49 else return 1; 50 51 if ((code & 0x8000) == 0) { 52 /* KS X 1001 coded character */ 53 OUT1((code >> 8) | 0x80) 54 OUT2((code & 0xFF) | 0x80) 55 NEXT(1, 2) 56 } 57 else { /* Mapping is found in CP949 extension, 58 * but we encode it in KS X 1001:1998 Annex 3, 59 * make-up sequence for EUC-KR. */ 60 61 REQUIRE_OUTBUF(8) 62 63 /* syllable composition precedence */ 64 OUT1(EUCKR_JAMO_FIRSTBYTE) 65 OUT2(EUCKR_JAMO_FILLER) 66 67 /* All codepoints in CP949 extension are in unicode 68 * Hangul Syllable area. */ 69 assert(0xac00 <= c && c <= 0xd7a3); 70 c -= 0xac00; 71 72 OUT3(EUCKR_JAMO_FIRSTBYTE) 73 OUT4(u2cgk_choseong[c / 588]) 74 NEXT_OUT(4) 75 76 OUT1(EUCKR_JAMO_FIRSTBYTE) 77 OUT2(u2cgk_jungseong[(c / 28) % 21]) 78 OUT3(EUCKR_JAMO_FIRSTBYTE) 79 OUT4(u2cgk_jongseong[c % 28]) 80 NEXT(1, 4) 81 } 82 } 83 84 return 0; 85 } 86 87 #define NONE 127 88 88 89 89 static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */ 90 91 92 93 90 0, 1, NONE, 2, NONE, NONE, 3, 4, 91 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE, 92 6, 7, 8, NONE, 9, 10, 11, 12, 93 13, 14, 15, 16, 17, 18 94 94 }; 95 95 static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */ 96 97 98 99 96 1, 2, 3, 4, 5, 6, 7, NONE, 97 8, 9, 10, 11, 12, 13, 14, 15, 98 16, 17, NONE, 18, 19, 20, 21, 22, 99 NONE, 23, 24, 25, 26, 27 100 100 }; 101 101 102 102 DECODER(euc_kr) 103 103 { 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 104 while (inleft > 0) { 105 unsigned char c = IN1; 106 107 REQUIRE_OUTBUF(1) 108 109 if (c < 0x80) { 110 OUT1(c) 111 NEXT(1, 1) 112 continue; 113 } 114 115 REQUIRE_INBUF(2) 116 117 if (c == EUCKR_JAMO_FIRSTBYTE && 118 IN2 == EUCKR_JAMO_FILLER) { 119 /* KS X 1001:1998 Annex 3 make-up sequence */ 120 DBCHAR cho, jung, jong; 121 122 REQUIRE_INBUF(8) 123 if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || 124 (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || 125 (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) 126 return 8; 127 128 c = (*inbuf)[3]; 129 if (0xa1 <= c && c <= 0xbe) 130 cho = cgk2u_choseong[c - 0xa1]; 131 else 132 cho = NONE; 133 134 c = (*inbuf)[5]; 135 jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE; 136 137 c = (*inbuf)[7]; 138 if (c == EUCKR_JAMO_FILLER) 139 jong = 0; 140 else if (0xa1 <= c && c <= 0xbe) 141 jong = cgk2u_jongseong[c - 0xa1]; 142 else 143 jong = NONE; 144 145 if (cho == NONE || jung == NONE || jong == NONE) 146 return 8; 147 148 OUT1(0xac00 + cho*588 + jung*28 + jong); 149 NEXT(8, 1) 150 } 151 else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) { 152 NEXT(2, 1) 153 } 154 else 155 return 2; 156 } 157 158 return 0; 159 159 } 160 160 #undef NONE … … 167 167 ENCODER(cp949) 168 168 { 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 169 while (inleft > 0) { 170 Py_UNICODE c = IN1; 171 DBCHAR code; 172 173 if (c < 0x80) { 174 WRITE1((unsigned char)c) 175 NEXT(1, 1) 176 continue; 177 } 178 UCS4INVALID(c) 179 180 REQUIRE_OUTBUF(2) 181 TRYMAP_ENC(cp949, code, c); 182 else return 1; 183 184 OUT1((code >> 8) | 0x80) 185 if (code & 0x8000) 186 OUT2(code & 0xFF) /* MSB set: CP949 */ 187 else 188 OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */ 189 NEXT(1, 2) 190 } 191 192 return 0; 193 193 } 194 194 195 195 DECODER(cp949) 196 196 { 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 197 while (inleft > 0) { 198 unsigned char c = IN1; 199 200 REQUIRE_OUTBUF(1) 201 202 if (c < 0x80) { 203 OUT1(c) 204 NEXT(1, 1) 205 continue; 206 } 207 208 REQUIRE_INBUF(2) 209 TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); 210 else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); 211 else return 2; 212 213 NEXT(2, 1) 214 } 215 216 return 0; 217 217 } 218 218 … … 251 251 ENCODER(johab) 252 252 { 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 253 while (inleft > 0) { 254 Py_UNICODE c = IN1; 255 DBCHAR code; 256 257 if (c < 0x80) { 258 WRITE1((unsigned char)c) 259 NEXT(1, 1) 260 continue; 261 } 262 UCS4INVALID(c) 263 264 REQUIRE_OUTBUF(2) 265 266 if (c >= 0xac00 && c <= 0xd7a3) { 267 c -= 0xac00; 268 code = 0x8000 | 269 (u2johabidx_choseong[c / 588] << 10) | 270 (u2johabidx_jungseong[(c / 28) % 21] << 5) | 271 u2johabidx_jongseong[c % 28]; 272 } 273 else if (c >= 0x3131 && c <= 0x3163) 274 code = u2johabjamo[c - 0x3131]; 275 else TRYMAP_ENC(cp949, code, c) { 276 unsigned char c1, c2, t2; 277 unsigned short t1; 278 279 assert((code & 0x8000) == 0); 280 c1 = code >> 8; 281 c2 = code & 0xff; 282 if (((c1 >= 0x21 && c1 <= 0x2c) || 283 (c1 >= 0x4a && c1 <= 0x7d)) && 284 (c2 >= 0x21 && c2 <= 0x7e)) { 285 t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : 286 (c1 - 0x21 + 0x197)); 287 t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21); 288 OUT1(t1 >> 1) 289 OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43) 290 NEXT(1, 2) 291 continue; 292 } 293 else 294 return 1; 295 } 296 else 297 return 1; 298 299 OUT1(code >> 8) 300 OUT2(code & 0xff) 301 NEXT(1, 2) 302 } 303 304 return 0; 305 305 } 306 306 … … 348 348 DECODER(johab) 349 349 { 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 350 while (inleft > 0) { 351 unsigned char c = IN1, c2; 352 353 REQUIRE_OUTBUF(1) 354 355 if (c < 0x80) { 356 OUT1(c) 357 NEXT(1, 1) 358 continue; 359 } 360 361 REQUIRE_INBUF(2) 362 c2 = IN2; 363 364 if (c < 0xd8) { 365 /* johab hangul */ 366 unsigned char c_cho, c_jung, c_jong; 367 unsigned char i_cho, i_jung, i_jong; 368 369 c_cho = (c >> 2) & 0x1f; 370 c_jung = ((c << 3) | c2 >> 5) & 0x1f; 371 c_jong = c2 & 0x1f; 372 373 i_cho = johabidx_choseong[c_cho]; 374 i_jung = johabidx_jungseong[c_jung]; 375 i_jong = johabidx_jongseong[c_jong]; 376 377 if (i_cho == NONE || i_jung == NONE || i_jong == NONE) 378 return 2; 379 380 /* we don't use U+1100 hangul jamo yet. */ 381 if (i_cho == FILL) { 382 if (i_jung == FILL) { 383 if (i_jong == FILL) 384 OUT1(0x3000) 385 else 386 OUT1(0x3100 | 387 johabjamo_jongseong[c_jong]) 388 } 389 else { 390 if (i_jong == FILL) 391 OUT1(0x3100 | 392 johabjamo_jungseong[c_jung]) 393 else 394 return 2; 395 } 396 } else { 397 if (i_jung == FILL) { 398 if (i_jong == FILL) 399 OUT1(0x3100 | 400 johabjamo_choseong[c_cho]) 401 else 402 return 2; 403 } 404 else 405 OUT1(0xac00 + 406 i_cho * 588 + 407 i_jung * 28 + 408 (i_jong == FILL ? 0 : i_jong)) 409 } 410 NEXT(2, 1) 411 } else { 412 /* KS X 1001 except hangul jamos and syllables */ 413 if (c == 0xdf || c > 0xf9 || 414 c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) || 415 (c2 & 0x7f) == 0x7f || 416 (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3))) 417 return 2; 418 else { 419 unsigned char t1, t2; 420 421 t1 = (c < 0xe0 ? 2 * (c - 0xd9) : 422 2 * c - 0x197); 423 t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43); 424 t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21; 425 t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; 426 427 TRYMAP_DEC(ksx1001, **outbuf, t1, t2); 428 else return 2; 429 NEXT(2, 1) 430 } 431 } 432 } 433 434 return 0; 435 435 } 436 436 #undef NONE -
python/vendor/current/Modules/cjkcodecs/_codecs_tw.c
r2 r388 14 14 ENCODER(big5) 15 15 { 16 17 18 16 while (inleft > 0) { 17 Py_UNICODE c = **inbuf; 18 DBCHAR code; 19 19 20 21 22 23 24 25 26 20 if (c < 0x80) { 21 REQUIRE_OUTBUF(1) 22 **outbuf = (unsigned char)c; 23 NEXT(1, 1) 24 continue; 25 } 26 UCS4INVALID(c) 27 27 28 28 REQUIRE_OUTBUF(2) 29 29 30 31 30 TRYMAP_ENC(big5, code, c); 31 else return 1; 32 32 33 34 35 36 33 OUT1(code >> 8) 34 OUT2(code & 0xFF) 35 NEXT(1, 2) 36 } 37 37 38 38 return 0; 39 39 } 40 40 41 41 DECODER(big5) 42 42 { 43 44 43 while (inleft > 0) { 44 unsigned char c = IN1; 45 45 46 46 REQUIRE_OUTBUF(1) 47 47 48 49 50 51 52 48 if (c < 0x80) { 49 OUT1(c) 50 NEXT(1, 1) 51 continue; 52 } 53 53 54 55 56 57 58 59 54 REQUIRE_INBUF(2) 55 TRYMAP_DEC(big5, **outbuf, c, IN2) { 56 NEXT(2, 1) 57 } 58 else return 2; 59 } 60 60 61 61 return 0; 62 62 } 63 63 … … 69 69 ENCODER(cp950) 70 70 { 71 72 73 71 while (inleft > 0) { 72 Py_UNICODE c = IN1; 73 DBCHAR code; 74 74 75 76 77 78 79 80 75 if (c < 0x80) { 76 WRITE1((unsigned char)c) 77 NEXT(1, 1) 78 continue; 79 } 80 UCS4INVALID(c) 81 81 82 83 84 85 82 REQUIRE_OUTBUF(2) 83 TRYMAP_ENC(cp950ext, code, c); 84 else TRYMAP_ENC(big5, code, c); 85 else return 1; 86 86 87 88 89 90 87 OUT1(code >> 8) 88 OUT2(code & 0xFF) 89 NEXT(1, 2) 90 } 91 91 92 92 return 0; 93 93 } 94 94 95 95 DECODER(cp950) 96 96 { 97 98 97 while (inleft > 0) { 98 unsigned char c = IN1; 99 99 100 100 REQUIRE_OUTBUF(1) 101 101 102 103 104 105 106 102 if (c < 0x80) { 103 OUT1(c) 104 NEXT(1, 1) 105 continue; 106 } 107 107 108 108 REQUIRE_INBUF(2) 109 109 110 111 112 110 TRYMAP_DEC(cp950ext, **outbuf, c, IN2); 111 else TRYMAP_DEC(big5, **outbuf, c, IN2); 112 else return 2; 113 113 114 115 114 NEXT(2, 1) 115 } 116 116 117 117 return 0; 118 118 } 119 119 -
python/vendor/current/Modules/cjkcodecs/alg_jisx0201.h
r2 r388 1 #define JISX0201_R_ENCODE(c, assi) 2 if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e)\3 (assi) = (c);\4 else if ((c) == 0x00a5) (assi) = 0x5c;\5 6 #define JISX0201_K_ENCODE(c, assi) 7 if ((c) >= 0xff61 && (c) <= 0xff9f)\8 9 #define JISX0201_ENCODE(c, assi) 10 JISX0201_R_ENCODE(c, assi)\11 1 #define JISX0201_R_ENCODE(c, assi) \ 2 if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \ 3 (assi) = (c); \ 4 else if ((c) == 0x00a5) (assi) = 0x5c; \ 5 else if ((c) == 0x203e) (assi) = 0x7e; 6 #define JISX0201_K_ENCODE(c, assi) \ 7 if ((c) >= 0xff61 && (c) <= 0xff9f) \ 8 (assi) = (c) - 0xfec0; 9 #define JISX0201_ENCODE(c, assi) \ 10 JISX0201_R_ENCODE(c, assi) \ 11 else JISX0201_K_ENCODE(c, assi) 12 12 13 #define JISX0201_R_DECODE(c, assi) 14 if ((c) < 0x5c) (assi) = (c);\15 else if ((c) == 0x5c) (assi) = 0x00a5;\16 else if ((c) < 0x7e) (assi) = (c);\17 else if ((c) == 0x7e) (assi) = 0x203e;\18 19 #define JISX0201_K_DECODE(c, assi) 20 if ((c) >= 0xa1 && (c) <= 0xdf)\21 22 #define JISX0201_DECODE(c, assi) 23 JISX0201_R_DECODE(c, assi)\24 13 #define JISX0201_R_DECODE(c, assi) \ 14 if ((c) < 0x5c) (assi) = (c); \ 15 else if ((c) == 0x5c) (assi) = 0x00a5; \ 16 else if ((c) < 0x7e) (assi) = (c); \ 17 else if ((c) == 0x7e) (assi) = 0x203e; \ 18 else if ((c) == 0x7f) (assi) = 0x7f; 19 #define JISX0201_K_DECODE(c, assi) \ 20 if ((c) >= 0xa1 && (c) <= 0xdf) \ 21 (assi) = 0xfec0 + (c); 22 #define JISX0201_DECODE(c, assi) \ 23 JISX0201_R_DECODE(c, assi) \ 24 else JISX0201_K_DECODE(c, assi) -
python/vendor/current/Modules/cjkcodecs/cjkcodecs.h
r2 r388 14 14 15 15 /* a unicode "undefined" codepoint */ 16 #define UNIINV 16 #define UNIINV 0xFFFE 17 17 18 18 /* internal-use DBCS codepoints which aren't used by any charsets */ 19 #define NOCHAR 20 #define MULTIC 21 #define DBCINV 19 #define NOCHAR 0xFFFF 20 #define MULTIC 0xFFFE 21 #define DBCINV 0xFFFD 22 22 23 23 /* shorter macros to save source size of mapping tables */ … … 28 28 29 29 struct dbcs_index { 30 31 30 const ucs2_t *map; 31 unsigned char bottom, top; 32 32 }; 33 33 typedef struct dbcs_index decode_map; 34 34 35 35 struct widedbcs_index { 36 37 36 const ucs4_t *map; 37 unsigned char bottom, top; 38 38 }; 39 39 typedef struct widedbcs_index widedecode_map; 40 40 41 41 struct unim_index { 42 43 42 const DBCHAR *map; 43 unsigned char bottom, top; 44 44 }; 45 45 typedef struct unim_index encode_map; 46 46 47 47 struct unim_index_bytebased { 48 49 48 const unsigned char *map; 49 unsigned char bottom, top; 50 50 }; 51 51 52 52 struct dbcs_map { 53 54 55 53 const char *charset; 54 const struct unim_index *encmap; 55 const struct dbcs_index *decmap; 56 56 }; 57 57 58 58 struct pair_encodemap { 59 60 59 ucs4_t uniseq; 60 DBCHAR code; 61 61 }; 62 62 … … 64 64 static const struct dbcs_map *mapping_list; 65 65 66 #define CODEC_INIT(encoding) 67 68 69 #define ENCODER_INIT(encoding) 70 static int encoding##_encode_init(\71 72 #define ENCODER(encoding) 73 static Py_ssize_t encoding##_encode(\74 MultibyteCodec_State *state, const void *config,\75 const Py_UNICODE **inbuf, Py_ssize_t inleft,\76 77 #define ENCODER_RESET(encoding) 78 static Py_ssize_t encoding##_encode_reset(\79 MultibyteCodec_State *state, const void *config,\80 81 82 #define DECODER_INIT(encoding) 83 static int encoding##_decode_init(\84 85 #define DECODER(encoding) 86 static Py_ssize_t encoding##_decode(\87 MultibyteCodec_State *state, const void *config,\88 const unsigned char **inbuf, Py_ssize_t inleft,\89 90 #define DECODER_RESET(encoding) 91 static Py_ssize_t encoding##_decode_reset(\92 66 #define CODEC_INIT(encoding) \ 67 static int encoding##_codec_init(const void *config) 68 69 #define ENCODER_INIT(encoding) \ 70 static int encoding##_encode_init( \ 71 MultibyteCodec_State *state, const void *config) 72 #define ENCODER(encoding) \ 73 static Py_ssize_t encoding##_encode( \ 74 MultibyteCodec_State *state, const void *config, \ 75 const Py_UNICODE **inbuf, Py_ssize_t inleft, \ 76 unsigned char **outbuf, Py_ssize_t outleft, int flags) 77 #define ENCODER_RESET(encoding) \ 78 static Py_ssize_t encoding##_encode_reset( \ 79 MultibyteCodec_State *state, const void *config, \ 80 unsigned char **outbuf, Py_ssize_t outleft) 81 82 #define DECODER_INIT(encoding) \ 83 static int encoding##_decode_init( \ 84 MultibyteCodec_State *state, const void *config) 85 #define DECODER(encoding) \ 86 static Py_ssize_t encoding##_decode( \ 87 MultibyteCodec_State *state, const void *config, \ 88 const unsigned char **inbuf, Py_ssize_t inleft, \ 89 Py_UNICODE **outbuf, Py_ssize_t outleft) 90 #define DECODER_RESET(encoding) \ 91 static Py_ssize_t encoding##_decode_reset( \ 92 MultibyteCodec_State *state, const void *config) 93 93 94 94 #if Py_UNICODE_SIZE == 4 95 #define UCS4INVALID(code) 96 if ((code) > 0xFFFF)\97 95 #define UCS4INVALID(code) \ 96 if ((code) > 0xFFFF) \ 97 return 1; 98 98 #else 99 #define UCS4INVALID(code) 100 101 #endif 102 103 #define NEXT_IN(i) 104 (*inbuf) += (i);\105 106 #define NEXT_OUT(o) 107 (*outbuf) += (o);\108 109 #define NEXT(i, o) 110 111 112 #define REQUIRE_INBUF(n) 113 if (inleft < (n))\114 115 #define REQUIRE_OUTBUF(n) 116 if (outleft < (n))\117 99 #define UCS4INVALID(code) \ 100 if (0) ; 101 #endif 102 103 #define NEXT_IN(i) \ 104 (*inbuf) += (i); \ 105 (inleft) -= (i); 106 #define NEXT_OUT(o) \ 107 (*outbuf) += (o); \ 108 (outleft) -= (o); 109 #define NEXT(i, o) \ 110 NEXT_IN(i) NEXT_OUT(o) 111 112 #define REQUIRE_INBUF(n) \ 113 if (inleft < (n)) \ 114 return MBERR_TOOFEW; 115 #define REQUIRE_OUTBUF(n) \ 116 if (outleft < (n)) \ 117 return MBERR_TOOSMALL; 118 118 119 119 #define IN1 ((*inbuf)[0]) … … 127 127 #define OUT4(c) ((*outbuf)[3]) = (c); 128 128 129 #define WRITE1(c1) 130 REQUIRE_OUTBUF(1)\131 132 #define WRITE2(c1, c2) 133 REQUIRE_OUTBUF(2)\134 (*outbuf)[0] = (c1);\135 136 #define WRITE3(c1, c2, c3) 137 REQUIRE_OUTBUF(3)\138 (*outbuf)[0] = (c1);\139 (*outbuf)[1] = (c2);\140 141 #define WRITE4(c1, c2, c3, c4) 142 REQUIRE_OUTBUF(4)\143 (*outbuf)[0] = (c1);\144 (*outbuf)[1] = (c2);\145 (*outbuf)[2] = (c3);\146 129 #define WRITE1(c1) \ 130 REQUIRE_OUTBUF(1) \ 131 (*outbuf)[0] = (c1); 132 #define WRITE2(c1, c2) \ 133 REQUIRE_OUTBUF(2) \ 134 (*outbuf)[0] = (c1); \ 135 (*outbuf)[1] = (c2); 136 #define WRITE3(c1, c2, c3) \ 137 REQUIRE_OUTBUF(3) \ 138 (*outbuf)[0] = (c1); \ 139 (*outbuf)[1] = (c2); \ 140 (*outbuf)[2] = (c3); 141 #define WRITE4(c1, c2, c3, c4) \ 142 REQUIRE_OUTBUF(4) \ 143 (*outbuf)[0] = (c1); \ 144 (*outbuf)[1] = (c2); \ 145 (*outbuf)[2] = (c3); \ 146 (*outbuf)[3] = (c4); 147 147 148 148 #if Py_UNICODE_SIZE == 2 149 # define WRITEUCS4(c) 150 REQUIRE_OUTBUF(2)\151 (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10);\152 (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff);\153 149 # define WRITEUCS4(c) \ 150 REQUIRE_OUTBUF(2) \ 151 (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ 152 (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ 153 NEXT_OUT(2) 154 154 #else 155 # define WRITEUCS4(c) 156 REQUIRE_OUTBUF(1)\157 **outbuf = (Py_UNICODE)(c);\158 159 #endif 160 161 #define _TRYMAP_ENC(m, assi, val) 162 ((m)->map != NULL && (val) >= (m)->bottom &&\163 (val)<= (m)->top && ((assi) = (m)->map[(val) -\164 165 #define TRYMAP_ENC_COND(charset, assi, uni) 166 167 #define TRYMAP_ENC(charset, assi, uni) 168 169 170 #define _TRYMAP_DEC(m, assi, val) 171 ((m)->map != NULL && (val) >= (m)->bottom &&\172 (val)<= (m)->top && ((assi) = (m)->map[(val) -\173 174 #define TRYMAP_DEC(charset, assi, c1, c2) 175 176 177 #define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) 178 ((m)->map != NULL && (val) >= (m)->bottom &&\179 (val)<= (m)->top &&\180 181 182 183 #define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) 184 185 186 #define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) 187 155 # define WRITEUCS4(c) \ 156 REQUIRE_OUTBUF(1) \ 157 **outbuf = (Py_UNICODE)(c); \ 158 NEXT_OUT(1) 159 #endif 160 161 #define _TRYMAP_ENC(m, assi, val) \ 162 ((m)->map != NULL && (val) >= (m)->bottom && \ 163 (val)<= (m)->top && ((assi) = (m)->map[(val) - \ 164 (m)->bottom]) != NOCHAR) 165 #define TRYMAP_ENC_COND(charset, assi, uni) \ 166 _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff) 167 #define TRYMAP_ENC(charset, assi, uni) \ 168 if TRYMAP_ENC_COND(charset, assi, uni) 169 170 #define _TRYMAP_DEC(m, assi, val) \ 171 ((m)->map != NULL && (val) >= (m)->bottom && \ 172 (val)<= (m)->top && ((assi) = (m)->map[(val) - \ 173 (m)->bottom]) != UNIINV) 174 #define TRYMAP_DEC(charset, assi, c1, c2) \ 175 if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) 176 177 #define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \ 178 ((m)->map != NULL && (val) >= (m)->bottom && \ 179 (val)<= (m)->top && \ 180 ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \ 181 (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \ 182 (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1)) 183 #define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \ 184 if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \ 185 assplane, asshi, asslo, (uni) & 0xff) 186 #define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \ 187 if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2) 188 188 189 189 #if Py_UNICODE_SIZE == 2 190 #define DECODE_SURROGATE(c) 191 if (c >> 10 == 0xd800 >> 10) { /* high surrogate */\192 REQUIRE_INBUF(2)\193 194 195 ((ucs4_t)(IN2) - 0xdc00);\196 }\197 198 #define GET_INSIZE(c) 190 #define DECODE_SURROGATE(c) \ 191 if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \ 192 REQUIRE_INBUF(2) \ 193 if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \ 194 c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \ 195 ((ucs4_t)(IN2) - 0xdc00); \ 196 } \ 197 } 198 #define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1) 199 199 #else 200 200 #define DECODE_SURROGATE(c) {;} 201 #define GET_INSIZE(c) 201 #define GET_INSIZE(c) 1 202 202 #endif 203 203 … … 206 206 #define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap}, 207 207 #define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap}, 208 #define END_MAPPINGS_LIST 209 {"", NULL, NULL} };\210 static const struct dbcs_map *mapping_list =\211 208 #define END_MAPPINGS_LIST \ 209 {"", NULL, NULL} }; \ 210 static const struct dbcs_map *mapping_list = \ 211 (const struct dbcs_map *)_mapping_list; 212 212 213 213 #define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = { 214 #define _STATEFUL_METHODS(enc) 215 enc##_encode,\216 enc##_encode_init,\217 enc##_encode_reset,\218 enc##_decode,\219 enc##_decode_init,\220 221 #define _STATELESS_METHODS(enc) 222 enc##_encode, NULL, NULL,\223 224 #define CODEC_STATEFUL(enc) { 225 #enc, NULL, NULL,\226 _STATEFUL_METHODS(enc)\214 #define _STATEFUL_METHODS(enc) \ 215 enc##_encode, \ 216 enc##_encode_init, \ 217 enc##_encode_reset, \ 218 enc##_decode, \ 219 enc##_decode_init, \ 220 enc##_decode_reset, 221 #define _STATELESS_METHODS(enc) \ 222 enc##_encode, NULL, NULL, \ 223 enc##_decode, NULL, NULL, 224 #define CODEC_STATEFUL(enc) { \ 225 #enc, NULL, NULL, \ 226 _STATEFUL_METHODS(enc) \ 227 227 }, 228 #define CODEC_STATELESS(enc) { 229 #enc, NULL, NULL,\230 _STATELESS_METHODS(enc)\228 #define CODEC_STATELESS(enc) { \ 229 #enc, NULL, NULL, \ 230 _STATELESS_METHODS(enc) \ 231 231 }, 232 #define CODEC_STATELESS_WINIT(enc) { 233 #enc, NULL,\234 enc##_codec_init,\235 _STATELESS_METHODS(enc)\232 #define CODEC_STATELESS_WINIT(enc) { \ 233 #enc, NULL, \ 234 enc##_codec_init, \ 235 _STATELESS_METHODS(enc) \ 236 236 }, 237 #define END_CODECS_LIST 238 {"", NULL,} };\239 static const MultibyteCodec *codec_list =\240 237 #define END_CODECS_LIST \ 238 {"", NULL,} }; \ 239 static const MultibyteCodec *codec_list = \ 240 (const MultibyteCodec *)_codec_list; 241 241 242 242 static PyObject * 243 243 getmultibytecodec(void) 244 244 { 245 246 247 248 249 250 251 252 253 254 245 static PyObject *cofunc = NULL; 246 247 if (cofunc == NULL) { 248 PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec"); 249 if (mod == NULL) 250 return NULL; 251 cofunc = PyObject_GetAttrString(mod, "__create_codec"); 252 Py_DECREF(mod); 253 } 254 return cofunc; 255 255 } 256 256 … … 258 258 getcodec(PyObject *self, PyObject *encoding) 259 259 { 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 codecobj = PyCObject_FromVoidPtr((void *)codec, NULL);286 287 288 289 290 291 292 260 PyObject *codecobj, *r, *cofunc; 261 const MultibyteCodec *codec; 262 const char *enc; 263 264 if (!PyString_Check(encoding)) { 265 PyErr_SetString(PyExc_TypeError, 266 "encoding name must be a string."); 267 return NULL; 268 } 269 270 cofunc = getmultibytecodec(); 271 if (cofunc == NULL) 272 return NULL; 273 274 enc = PyString_AS_STRING(encoding); 275 for (codec = codec_list; codec->encoding[0]; codec++) 276 if (strcmp(codec->encoding, enc) == 0) 277 break; 278 279 if (codec->encoding[0] == '\0') { 280 PyErr_SetString(PyExc_LookupError, 281 "no such codec is supported."); 282 return NULL; 283 } 284 285 codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL); 286 if (codecobj == NULL) 287 return NULL; 288 289 r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL); 290 Py_DECREF(codecobj); 291 292 return r; 293 293 } 294 294 295 295 static struct PyMethodDef __methods[] = { 296 297 296 {"getcodec", (PyCFunction)getcodec, METH_O, ""}, 297 {NULL, NULL}, 298 298 }; 299 299 … … 301 301 register_maps(PyObject *module) 302 302 { 303 304 305 306 307 308 309 310 PyCObject_FromVoidPtr((void *)h, NULL));311 312 313 314 303 const struct dbcs_map *h; 304 305 for (h = mapping_list; h->charset[0] != '\0'; h++) { 306 char mhname[256] = "__map_"; 307 int r; 308 strcpy(mhname + sizeof("__map_") - 1, h->charset); 309 r = PyModule_AddObject(module, mhname, 310 PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL)); 311 if (r == -1) 312 return -1; 313 } 314 return 0; 315 315 } 316 316 … … 318 318 static DBCHAR 319 319 find_pairencmap(ucs2_t body, ucs2_t modifier, 320 321 { 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 320 const struct pair_encodemap *haystack, int haystacksize) 321 { 322 int pos, min, max; 323 ucs4_t value = body << 16 | modifier; 324 325 min = 0; 326 max = haystacksize; 327 328 for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) 329 if (value < haystack[pos].uniseq) { 330 if (max == pos) break; 331 else max = pos; 332 } 333 else if (value > haystack[pos].uniseq) { 334 if (min == pos) break; 335 else min = pos; 336 } 337 else 338 break; 339 340 if (value == haystack[pos].uniseq) 341 return haystack[pos].code; 342 else 343 return DBCINV; 344 344 } 345 345 #endif … … 347 347 #ifdef USING_IMPORTED_MAPS 348 348 #define IMPORT_MAP(locale, charset, encmap, decmap) \ 349 350 349 importmap("_codecs_" #locale, "__map_" #charset, \ 350 (const void**)encmap, (const void**)decmap) 351 351 352 352 static int 353 353 importmap(const char *modname, const char *symbol, 354 355 { 356 357 358 359 360 361 362 363 364 365 else if (!PyCObject_Check(o)) {366 367 "map data must be a CObject.");368 369 370 371 372 map = PyCObject_AsVoidPtr(o);373 374 375 376 377 378 379 380 381 354 const void **encmap, const void **decmap) 355 { 356 PyObject *o, *mod; 357 358 mod = PyImport_ImportModule((char *)modname); 359 if (mod == NULL) 360 return -1; 361 362 o = PyObject_GetAttrString(mod, (char*)symbol); 363 if (o == NULL) 364 goto errorexit; 365 else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) { 366 PyErr_SetString(PyExc_ValueError, 367 "map data must be a Capsule."); 368 goto errorexit; 369 } 370 else { 371 struct dbcs_map *map; 372 map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME); 373 if (encmap != NULL) 374 *encmap = map->encmap; 375 if (decmap != NULL) 376 *decmap = map->decmap; 377 Py_DECREF(o); 378 } 379 380 Py_DECREF(mod); 381 return 0; 382 382 383 383 errorexit: 384 385 386 } 387 #endif 388 389 #define I_AM_A_MODULE_FOR(loc) 390 void\391 init_codecs_##loc(void)\392 {\393 394 if (m != NULL)\395 (void)register_maps(m);\396 397 398 #endif 384 Py_DECREF(mod); 385 return -1; 386 } 387 #endif 388 389 #define I_AM_A_MODULE_FOR(loc) \ 390 void \ 391 init_codecs_##loc(void) \ 392 { \ 393 PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\ 394 if (m != NULL) \ 395 (void)register_maps(m); \ 396 } 397 398 #endif -
python/vendor/current/Modules/cjkcodecs/emu_jisx0213_2000.h
r2 r388 6 6 #endif 7 7 8 #define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) 9 if (config == (void *)2000 && (\10 (c) == 0x9B1C || (c) == 0x4FF1 ||\11 (c) == 0x525D || (c) == 0x541E ||\12 (c) == 0x5653 || (c) == 0x59F8 ||\13 (c) == 0x5C5B || (c) == 0x5E77 ||\14 (c) == 0x7626 || (c) == 0x7E6B))\15 return EMULATE_JISX0213_2000_ENCODE_INVALID;\16 else if (config == (void *)2000 && (c) == 0x9B1D)\17 (assi) = 0x8000 | 0x7d3b;\8 #define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \ 9 if (config == (void *)2000 && ( \ 10 (c) == 0x9B1C || (c) == 0x4FF1 || \ 11 (c) == 0x525D || (c) == 0x541E || \ 12 (c) == 0x5653 || (c) == 0x59F8 || \ 13 (c) == 0x5C5B || (c) == 0x5E77 || \ 14 (c) == 0x7626 || (c) == 0x7E6B)) \ 15 return EMULATE_JISX0213_2000_ENCODE_INVALID; \ 16 else if (config == (void *)2000 && (c) == 0x9B1D) \ 17 (assi) = 0x8000 | 0x7d3b; \ 18 18 19 #define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) 20 if (config == (void *)2000 && (c) == 0x20B9F)\21 19 #define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \ 20 if (config == (void *)2000 && (c) == 0x20B9F) \ 21 return EMULATE_JISX0213_2000_ENCODE_INVALID; 22 22 23 23 #ifndef EMULATE_JISX0213_2000_DECODE_INVALID … … 25 25 #endif 26 26 27 #define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) 28 if (config == (void *)2000 &&\29 (((c1) == 0x2E && (c2) == 0x21) ||\30 ((c1) == 0x2F && (c2) == 0x7E) ||\31 ((c1) == 0x4F && (c2) == 0x54) ||\32 ((c1) == 0x4F && (c2) == 0x7E) ||\33 ((c1) == 0x74 && (c2) == 0x27) ||\34 ((c1) == 0x7E && (c2) == 0x7A) ||\35 ((c1) == 0x7E && (c2) == 0x7B) ||\36 ((c1) == 0x7E && (c2) == 0x7C) ||\37 ((c1) == 0x7E && (c2) == 0x7D) ||\38 ((c1) == 0x7E && (c2) == 0x7E)))\39 27 #define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \ 28 if (config == (void *)2000 && \ 29 (((c1) == 0x2E && (c2) == 0x21) || \ 30 ((c1) == 0x2F && (c2) == 0x7E) || \ 31 ((c1) == 0x4F && (c2) == 0x54) || \ 32 ((c1) == 0x4F && (c2) == 0x7E) || \ 33 ((c1) == 0x74 && (c2) == 0x27) || \ 34 ((c1) == 0x7E && (c2) == 0x7A) || \ 35 ((c1) == 0x7E && (c2) == 0x7B) || \ 36 ((c1) == 0x7E && (c2) == 0x7C) || \ 37 ((c1) == 0x7E && (c2) == 0x7D) || \ 38 ((c1) == 0x7E && (c2) == 0x7E))) \ 39 return EMULATE_JISX0213_2000_DECODE_INVALID; 40 40 41 #define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) 42 if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B)\43 41 #define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \ 42 if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \ 43 (assi) = 0x9B1D; -
python/vendor/current/Modules/cjkcodecs/multibytecodec.c
r2 r388 46 46 47 47 static PyObject *multibytecodec_encode(MultibyteCodec *, 48 49 50 51 #define MBENC_RESET 48 MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t, 49 PyObject *, int); 50 51 #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ 52 52 53 53 static PyObject * 54 54 make_tuple(PyObject *object, Py_ssize_t len) 55 55 { 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 56 PyObject *v, *w; 57 58 if (object == NULL) 59 return NULL; 60 61 v = PyTuple_New(2); 62 if (v == NULL) { 63 Py_DECREF(object); 64 return NULL; 65 } 66 PyTuple_SET_ITEM(v, 0, object); 67 68 w = PyInt_FromSsize_t(len); 69 if (w == NULL) { 70 Py_DECREF(v); 71 return NULL; 72 } 73 PyTuple_SET_ITEM(v, 1, w); 74 75 return v; 76 76 } 77 77 … … 79 79 internal_error_callback(const char *errors) 80 80 { 81 82 83 84 85 86 87 88 81 if (errors == NULL || strcmp(errors, "strict") == 0) 82 return ERROR_STRICT; 83 else if (strcmp(errors, "ignore") == 0) 84 return ERROR_IGNORE; 85 else if (strcmp(errors, "replace") == 0) 86 return ERROR_REPLACE; 87 else 88 return PyString_FromString(errors); 89 89 } 90 90 … … 92 92 call_error_callback(PyObject *errors, PyObject *exc) 93 93 { 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 94 PyObject *args, *cb, *r; 95 96 assert(PyString_Check(errors)); 97 cb = PyCodec_LookupError(PyString_AS_STRING(errors)); 98 if (cb == NULL) 99 return NULL; 100 101 args = PyTuple_New(1); 102 if (args == NULL) { 103 Py_DECREF(cb); 104 return NULL; 105 } 106 107 PyTuple_SET_ITEM(args, 0, exc); 108 Py_INCREF(exc); 109 110 r = PyObject_CallObject(cb, args); 111 Py_DECREF(args); 112 Py_DECREF(cb); 113 return r; 114 114 } 115 115 … … 117 117 codecctx_errors_get(MultibyteStatefulCodecContext *self) 118 118 { 119 120 121 122 123 124 125 126 127 128 129 130 131 132 119 const char *errors; 120 121 if (self->errors == ERROR_STRICT) 122 errors = "strict"; 123 else if (self->errors == ERROR_IGNORE) 124 errors = "ignore"; 125 else if (self->errors == ERROR_REPLACE) 126 errors = "replace"; 127 else { 128 Py_INCREF(self->errors); 129 return self->errors; 130 } 131 132 return PyString_FromString(errors); 133 133 } 134 134 135 135 static int 136 136 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value, 137 138 { 139 140 141 142 143 144 145 146 147 148 149 150 151 152 137 void *closure) 138 { 139 PyObject *cb; 140 141 if (!PyString_Check(value)) { 142 PyErr_SetString(PyExc_TypeError, "errors must be a string"); 143 return -1; 144 } 145 146 cb = internal_error_callback(PyString_AS_STRING(value)); 147 if (cb == NULL) 148 return -1; 149 150 ERROR_DECREF(self->errors); 151 self->errors = cb; 152 return 0; 153 153 } 154 154 155 155 /* This getset handlers list is used by all the stateful codec objects */ 156 156 static PyGetSetDef codecctx_getsets[] = { 157 {"errors",(getter)codecctx_errors_get,158 159 160 157 {"errors", (getter)codecctx_errors_get, 158 (setter)codecctx_errors_set, 159 PyDoc_STR("how to treat errors")}, 160 {NULL,} 161 161 }; 162 162 … … 164 164 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) 165 165 { 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 } 185 #define REQUIRE_ENCODEBUFFER(buf, s) { 186 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end)\187 if (expand_encodebuffer(buf, s) == -1)\188 goto errorexit;\166 Py_ssize_t orgpos, orgsize, incsize; 167 168 orgpos = (Py_ssize_t)((char *)buf->outbuf - 169 PyString_AS_STRING(buf->outobj)); 170 orgsize = PyString_GET_SIZE(buf->outobj); 171 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); 172 173 if (orgsize > PY_SSIZE_T_MAX - incsize) 174 return -1; 175 176 if (_PyString_Resize(&buf->outobj, orgsize + incsize) == -1) 177 return -1; 178 179 buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos; 180 buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj) 181 + PyString_GET_SIZE(buf->outobj); 182 183 return 0; 184 } 185 #define REQUIRE_ENCODEBUFFER(buf, s) { \ 186 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ 187 if (expand_encodebuffer(buf, s) == -1) \ 188 goto errorexit; \ 189 189 } 190 190 … … 192 192 expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) 193 193 { 194 195 196 197 198 199 200 201 202 203 204 205 206 207 } 208 #define REQUIRE_DECODEBUFFER(buf, s) { 209 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end)\210 if (expand_decodebuffer(buf, s) == -1)\211 goto errorexit;\194 Py_ssize_t orgpos, orgsize; 195 196 orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); 197 orgsize = PyUnicode_GET_SIZE(buf->outobj); 198 if (PyUnicode_Resize(&buf->outobj, orgsize + ( 199 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) 200 return -1; 201 202 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; 203 buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) 204 + PyUnicode_GET_SIZE(buf->outobj); 205 206 return 0; 207 } 208 #define REQUIRE_DECODEBUFFER(buf, s) { \ 209 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ 210 if (expand_decodebuffer(buf, s) == -1) \ 211 goto errorexit; \ 212 212 } 213 213 … … 219 219 static int 220 220 multibytecodec_encerror(MultibyteCodec *codec, 221 222 223 224 { 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 221 MultibyteCodec_State *state, 222 MultibyteEncodeBuffer *buf, 223 PyObject *errors, Py_ssize_t e) 224 { 225 PyObject *retobj = NULL, *retstr = NULL, *tobj; 226 Py_ssize_t retstrsize, newpos; 227 Py_ssize_t esize, start, end; 228 const char *reason; 229 230 if (e > 0) { 231 reason = "illegal multibyte sequence"; 232 esize = e; 233 } 234 else { 235 switch (e) { 236 case MBERR_TOOSMALL: 237 REQUIRE_ENCODEBUFFER(buf, -1); 238 return 0; /* retry it */ 239 case MBERR_TOOFEW: 240 reason = "incomplete multibyte sequence"; 241 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 242 break; 243 case MBERR_INTERNAL: 244 PyErr_SetString(PyExc_RuntimeError, 245 "internal codec error"); 246 return -1; 247 default: 248 PyErr_SetString(PyExc_RuntimeError, 249 "unknown runtime error"); 250 return -1; 251 } 252 } 253 254 if (errors == ERROR_REPLACE) { 255 const Py_UNICODE replchar = '?', *inbuf = &replchar; 256 Py_ssize_t r; 257 258 for (;;) { 259 Py_ssize_t outleft; 260 261 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); 262 r = codec->encode(state, codec->config, &inbuf, 1, 263 &buf->outbuf, outleft, 0); 264 if (r == MBERR_TOOSMALL) { 265 REQUIRE_ENCODEBUFFER(buf, -1); 266 continue; 267 } 268 else 269 break; 270 } 271 272 if (r != 0) { 273 REQUIRE_ENCODEBUFFER(buf, 1); 274 *buf->outbuf++ = '?'; 275 } 276 } 277 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { 278 buf->inbuf += esize; 279 return 0; 280 } 281 282 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); 283 end = start + esize; 284 285 /* use cached exception object if available */ 286 if (buf->excobj == NULL) { 287 buf->excobj = PyUnicodeEncodeError_Create(codec->encoding, 288 buf->inbuf_top, 289 buf->inbuf_end - buf->inbuf_top, 290 start, end, reason); 291 if (buf->excobj == NULL) 292 goto errorexit; 293 } 294 else 295 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || 296 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || 297 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) 298 goto errorexit; 299 300 if (errors == ERROR_STRICT) { 301 PyCodec_StrictErrors(buf->excobj); 302 goto errorexit; 303 } 304 305 retobj = call_error_callback(errors, buf->excobj); 306 if (retobj == NULL) 307 goto errorexit; 308 309 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || 310 !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || 311 !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) || 312 PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) { 313 PyErr_SetString(PyExc_TypeError, 314 "encoding error handler must return " 315 "(unicode, int) tuple"); 316 goto errorexit; 317 } 318 319 { 320 const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); 321 322 retstr = multibytecodec_encode(codec, state, &uraw, 323 PyUnicode_GET_SIZE(tobj), ERROR_STRICT, 324 MBENC_FLUSH); 325 if (retstr == NULL) 326 goto errorexit; 327 } 328 329 retstrsize = PyString_GET_SIZE(retstr); 330 REQUIRE_ENCODEBUFFER(buf, retstrsize); 331 332 memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize); 333 buf->outbuf += retstrsize; 334 335 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); 336 if (newpos < 0 && !PyErr_Occurred()) 337 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); 338 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { 339 PyErr_Clear(); 340 PyErr_Format(PyExc_IndexError, 341 "position %zd from error handler out of bounds", 342 newpos); 343 goto errorexit; 344 } 345 buf->inbuf = buf->inbuf_top + newpos; 346 347 Py_DECREF(retobj); 348 Py_DECREF(retstr); 349 return 0; 350 350 351 351 errorexit: 352 353 354 352 Py_XDECREF(retobj); 353 Py_XDECREF(retstr); 354 return -1; 355 355 } 356 356 357 357 static int 358 358 multibytecodec_decerror(MultibyteCodec *codec, 359 360 361 362 { 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 359 MultibyteCodec_State *state, 360 MultibyteDecodeBuffer *buf, 361 PyObject *errors, Py_ssize_t e) 362 { 363 PyObject *retobj = NULL, *retuni = NULL; 364 Py_ssize_t retunisize, newpos; 365 const char *reason; 366 Py_ssize_t esize, start, end; 367 368 if (e > 0) { 369 reason = "illegal multibyte sequence"; 370 esize = e; 371 } 372 else { 373 switch (e) { 374 case MBERR_TOOSMALL: 375 REQUIRE_DECODEBUFFER(buf, -1); 376 return 0; /* retry it */ 377 case MBERR_TOOFEW: 378 reason = "incomplete multibyte sequence"; 379 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 380 break; 381 case MBERR_INTERNAL: 382 PyErr_SetString(PyExc_RuntimeError, 383 "internal codec error"); 384 return -1; 385 default: 386 PyErr_SetString(PyExc_RuntimeError, 387 "unknown runtime error"); 388 return -1; 389 } 390 } 391 392 if (errors == ERROR_REPLACE) { 393 REQUIRE_DECODEBUFFER(buf, 1); 394 *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; 395 } 396 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { 397 buf->inbuf += esize; 398 return 0; 399 } 400 401 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); 402 end = start + esize; 403 404 /* use cached exception object if available */ 405 if (buf->excobj == NULL) { 406 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding, 407 (const char *)buf->inbuf_top, 408 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top), 409 start, end, reason); 410 if (buf->excobj == NULL) 411 goto errorexit; 412 } 413 else 414 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || 415 PyUnicodeDecodeError_SetEnd(buf->excobj, end) || 416 PyUnicodeDecodeError_SetReason(buf->excobj, reason)) 417 goto errorexit; 418 419 if (errors == ERROR_STRICT) { 420 PyCodec_StrictErrors(buf->excobj); 421 goto errorexit; 422 } 423 424 retobj = call_error_callback(errors, buf->excobj); 425 if (retobj == NULL) 426 goto errorexit; 427 428 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || 429 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || 430 !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) || 431 PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) { 432 PyErr_SetString(PyExc_TypeError, 433 "decoding error handler must return " 434 "(unicode, int) tuple"); 435 goto errorexit; 436 } 437 438 retunisize = PyUnicode_GET_SIZE(retuni); 439 if (retunisize > 0) { 440 REQUIRE_DECODEBUFFER(buf, retunisize); 441 memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni), 442 retunisize * Py_UNICODE_SIZE); 443 buf->outbuf += retunisize; 444 } 445 446 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); 447 if (newpos < 0 && !PyErr_Occurred()) 448 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); 449 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { 450 PyErr_Clear(); 451 PyErr_Format(PyExc_IndexError, 452 "position %zd from error handler out of bounds", 453 newpos); 454 goto errorexit; 455 } 456 buf->inbuf = buf->inbuf_top + newpos; 457 Py_DECREF(retobj); 458 return 0; 459 459 460 460 errorexit: 461 462 461 Py_XDECREF(retobj); 462 return -1; 463 463 } 464 464 465 465 static PyObject * 466 466 multibytecodec_encode(MultibyteCodec *codec, 467 468 469 470 { 471 472 473 474 if (datalen == 0)475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 *data = buf.inbuf; 502 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) 503 break; 504 else if (multibytecodec_encerror(codec, state, &buf, errors,r)) 505 goto errorexit; 506 else if (r == MBERR_TOOFEW) 507 break; 508 } 509 510 if (codec->encreset != NULL) 511 for (;;) { 512 Py_ssize_t outleft; 513 514 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 515 r = codec->encreset(state, codec->config, &buf.outbuf, 516 outleft); 517 if (r == 0) 518 break; 519 else if (multibytecodec_encerror(codec, state, 520 &buf, errors, r)) 521 goto errorexit; 522 } 523 524 finalsize = (Py_ssize_t)((char *)buf.outbuf - 525 PyString_AS_STRING(buf.outobj)); 526 527 if (finalsize != PyString_GET_SIZE(buf.outobj))528 if (_PyString_Resize(&buf.outobj, finalsize) == -1) 529 goto errorexit; 530 531 532 467 MultibyteCodec_State *state, 468 const Py_UNICODE **data, Py_ssize_t datalen, 469 PyObject *errors, int flags) 470 { 471 MultibyteEncodeBuffer buf; 472 Py_ssize_t finalsize, r = 0; 473 474 if (datalen == 0 && !(flags & MBENC_RESET)) 475 return PyString_FromString(""); 476 477 buf.excobj = NULL; 478 buf.inbuf = buf.inbuf_top = *data; 479 buf.inbuf_end = buf.inbuf_top + datalen; 480 481 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { 482 PyErr_NoMemory(); 483 goto errorexit; 484 } 485 486 buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16); 487 if (buf.outobj == NULL) 488 goto errorexit; 489 buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj); 490 buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj); 491 492 while (buf.inbuf < buf.inbuf_end) { 493 Py_ssize_t inleft, outleft; 494 495 /* we don't reuse inleft and outleft here. 496 * error callbacks can relocate the cursor anywhere on buffer*/ 497 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); 498 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 499 r = codec->encode(state, codec->config, &buf.inbuf, inleft, 500 &buf.outbuf, outleft, flags); 501 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) 502 break; 503 else if (multibytecodec_encerror(codec, state, &buf, errors,r)) 504 goto errorexit; 505 else if (r == MBERR_TOOFEW) 506 break; 507 } 508 509 if (codec->encreset != NULL && (flags & MBENC_RESET)) 510 for (;;) { 511 Py_ssize_t outleft; 512 513 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 514 r = codec->encreset(state, codec->config, &buf.outbuf, 515 outleft); 516 if (r == 0) 517 break; 518 else if (multibytecodec_encerror(codec, state, 519 &buf, errors, r)) 520 goto errorexit; 521 } 522 523 finalsize = (Py_ssize_t)((char *)buf.outbuf - 524 PyString_AS_STRING(buf.outobj)); 525 526 if (finalsize != PyString_GET_SIZE(buf.outobj)) 527 if (_PyString_Resize(&buf.outobj, finalsize) == -1) 528 goto errorexit; 529 530 *data = buf.inbuf; 531 Py_XDECREF(buf.excobj); 532 return buf.outobj; 533 533 534 534 errorexit: 535 536 537 535 Py_XDECREF(buf.excobj); 536 Py_XDECREF(buf.outobj); 537 return NULL; 538 538 } 539 539 540 540 static PyObject * 541 541 MultibyteCodec_Encode(MultibyteCodecObject *self, 542 543 { 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 542 PyObject *args, PyObject *kwargs) 543 { 544 MultibyteCodec_State state; 545 Py_UNICODE *data; 546 PyObject *errorcb, *r, *arg, *ucvt; 547 const char *errors = NULL; 548 Py_ssize_t datalen; 549 550 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode", 551 codeckwarglist, &arg, &errors)) 552 return NULL; 553 554 if (PyUnicode_Check(arg)) 555 ucvt = NULL; 556 else { 557 arg = ucvt = PyObject_Unicode(arg); 558 if (arg == NULL) 559 return NULL; 560 else if (!PyUnicode_Check(arg)) { 561 PyErr_SetString(PyExc_TypeError, 562 "couldn't convert the object to unicode."); 563 Py_DECREF(ucvt); 564 return NULL; 565 } 566 } 567 568 data = PyUnicode_AS_UNICODE(arg); 569 datalen = PyUnicode_GET_SIZE(arg); 570 571 errorcb = internal_error_callback(errors); 572 if (errorcb == NULL) { 573 Py_XDECREF(ucvt); 574 return NULL; 575 } 576 577 if (self->codec->encinit != NULL && 578 self->codec->encinit(&state, self->codec->config) != 0) 579 goto errorexit; 580 r = multibytecodec_encode(self->codec, &state, 581 (const Py_UNICODE **)&data, datalen, errorcb, 582 MBENC_FLUSH | MBENC_RESET); 583 if (r == NULL) 584 goto errorexit; 585 586 ERROR_DECREF(errorcb); 587 Py_XDECREF(ucvt); 588 return make_tuple(r, datalen); 589 589 590 590 errorexit: 591 592 593 591 ERROR_DECREF(errorcb); 592 Py_XDECREF(ucvt); 593 return NULL; 594 594 } 595 595 596 596 static PyObject * 597 597 MultibyteCodec_Decode(MultibyteCodecObject *self, 598 599 { 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 598 PyObject *args, PyObject *kwargs) 599 { 600 MultibyteCodec_State state; 601 MultibyteDecodeBuffer buf; 602 PyObject *errorcb; 603 Py_buffer pdata; 604 const char *data, *errors = NULL; 605 Py_ssize_t datalen, finalsize; 606 607 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|z:decode", 608 codeckwarglist, &pdata, &errors)) 609 return NULL; 610 data = pdata.buf; 611 datalen = pdata.len; 612 613 errorcb = internal_error_callback(errors); 614 if (errorcb == NULL) { 615 PyBuffer_Release(&pdata); 616 return NULL; 617 } 618 619 if (datalen == 0) { 620 PyBuffer_Release(&pdata); 621 ERROR_DECREF(errorcb); 622 return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0); 623 } 624 625 buf.excobj = NULL; 626 buf.inbuf = buf.inbuf_top = (unsigned char *)data; 627 buf.inbuf_end = buf.inbuf_top + datalen; 628 buf.outobj = PyUnicode_FromUnicode(NULL, datalen); 629 if (buf.outobj == NULL) 630 goto errorexit; 631 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); 632 buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); 633 634 if (self->codec->decinit != NULL && 635 self->codec->decinit(&state, self->codec->config) != 0) 636 goto errorexit; 637 638 while (buf.inbuf < buf.inbuf_end) { 639 Py_ssize_t inleft, outleft, r; 640 641 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); 642 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 643 644 r = self->codec->decode(&state, self->codec->config, 645 &buf.inbuf, inleft, &buf.outbuf, outleft); 646 if (r == 0) 647 break; 648 else if (multibytecodec_decerror(self->codec, &state, 649 &buf, errorcb, r)) 650 goto errorexit; 651 } 652 653 finalsize = (Py_ssize_t)(buf.outbuf - 654 PyUnicode_AS_UNICODE(buf.outobj)); 655 656 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) 657 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) 658 goto errorexit; 659 660 PyBuffer_Release(&pdata); 661 Py_XDECREF(buf.excobj); 662 ERROR_DECREF(errorcb); 663 return make_tuple(buf.outobj, datalen); 664 664 665 665 errorexit: 666 667 668 669 670 671 666 PyBuffer_Release(&pdata); 667 ERROR_DECREF(errorcb); 668 Py_XDECREF(buf.excobj); 669 Py_XDECREF(buf.outobj); 670 671 return NULL; 672 672 } 673 673 674 674 static struct PyMethodDef multibytecodec_methods[] = { 675 {"encode",(PyCFunction)MultibyteCodec_Encode,676 677 678 {"decode",(PyCFunction)MultibyteCodec_Decode,679 680 681 {NULL,NULL},675 {"encode", (PyCFunction)MultibyteCodec_Encode, 676 METH_VARARGS | METH_KEYWORDS, 677 MultibyteCodec_Encode__doc__}, 678 {"decode", (PyCFunction)MultibyteCodec_Decode, 679 METH_VARARGS | METH_KEYWORDS, 680 MultibyteCodec_Decode__doc__}, 681 {NULL, NULL}, 682 682 }; 683 683 … … 685 685 multibytecodec_dealloc(MultibyteCodecObject *self) 686 686 { 687 687 PyObject_Del(self); 688 688 } 689 689 690 690 static PyTypeObject MultibyteCodec_Type = { 691 692 "MultibyteCodec",/* tp_name */693 sizeof(MultibyteCodecObject),/* tp_basicsize */694 0,/* tp_itemsize */695 696 697 0,/* tp_print */698 0,/* tp_getattr */699 0,/* tp_setattr */700 0,/* tp_compare */701 0,/* tp_repr */702 0,/* tp_as_number */703 0,/* tp_as_sequence */704 0,/* tp_as_mapping */705 0,/* tp_hash */706 0,/* tp_call */707 0,/* tp_str */708 PyObject_GenericGetAttr,/* tp_getattro */709 0,/* tp_setattro */710 0,/* tp_as_buffer */711 Py_TPFLAGS_DEFAULT,/* tp_flags */712 0,/* tp_doc */713 0,/* tp_traverse */714 0,/* tp_clear */715 0,/* tp_richcompare */716 0,/* tp_weaklistoffset */717 0,/* tp_iter */718 0,/* tp_iterext */719 multibytecodec_methods,/* tp_methods */691 PyVarObject_HEAD_INIT(NULL, 0) 692 "MultibyteCodec", /* tp_name */ 693 sizeof(MultibyteCodecObject), /* tp_basicsize */ 694 0, /* tp_itemsize */ 695 /* methods */ 696 (destructor)multibytecodec_dealloc, /* tp_dealloc */ 697 0, /* tp_print */ 698 0, /* tp_getattr */ 699 0, /* tp_setattr */ 700 0, /* tp_compare */ 701 0, /* tp_repr */ 702 0, /* tp_as_number */ 703 0, /* tp_as_sequence */ 704 0, /* tp_as_mapping */ 705 0, /* tp_hash */ 706 0, /* tp_call */ 707 0, /* tp_str */ 708 PyObject_GenericGetAttr, /* tp_getattro */ 709 0, /* tp_setattro */ 710 0, /* tp_as_buffer */ 711 Py_TPFLAGS_DEFAULT, /* tp_flags */ 712 0, /* tp_doc */ 713 0, /* tp_traverse */ 714 0, /* tp_clear */ 715 0, /* tp_richcompare */ 716 0, /* tp_weaklistoffset */ 717 0, /* tp_iter */ 718 0, /* tp_iterext */ 719 multibytecodec_methods, /* tp_methods */ 720 720 }; 721 721 … … 725 725 */ 726 726 727 #define STATEFUL_DCTX(o) 728 #define STATEFUL_ECTX(o) 727 #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) 728 #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) 729 729 730 730 static PyObject * 731 731 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, 732 733 { 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 (const Py_UNICODE **)&inbuf,780 datalen, ctx->errors, final ? MBENC_FLUSH: 0);781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 732 PyObject *unistr, int final) 733 { 734 PyObject *ucvt, *r = NULL; 735 Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; 736 Py_ssize_t datalen, origpending; 737 738 if (PyUnicode_Check(unistr)) 739 ucvt = NULL; 740 else { 741 unistr = ucvt = PyObject_Unicode(unistr); 742 if (unistr == NULL) 743 return NULL; 744 else if (!PyUnicode_Check(unistr)) { 745 PyErr_SetString(PyExc_TypeError, 746 "couldn't convert the object to unicode."); 747 Py_DECREF(ucvt); 748 return NULL; 749 } 750 } 751 752 datalen = PyUnicode_GET_SIZE(unistr); 753 origpending = ctx->pendingsize; 754 755 if (origpending > 0) { 756 if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { 757 PyErr_NoMemory(); 758 /* inbuf_tmp == NULL */ 759 goto errorexit; 760 } 761 inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); 762 if (inbuf_tmp == NULL) 763 goto errorexit; 764 memcpy(inbuf_tmp, ctx->pending, 765 Py_UNICODE_SIZE * ctx->pendingsize); 766 memcpy(inbuf_tmp + ctx->pendingsize, 767 PyUnicode_AS_UNICODE(unistr), 768 Py_UNICODE_SIZE * datalen); 769 datalen += ctx->pendingsize; 770 ctx->pendingsize = 0; 771 inbuf = inbuf_tmp; 772 } 773 else 774 inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); 775 776 inbuf_end = inbuf + datalen; 777 778 r = multibytecodec_encode(ctx->codec, &ctx->state, 779 (const Py_UNICODE **)&inbuf, datalen, 780 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0); 781 if (r == NULL) { 782 /* recover the original pending buffer */ 783 if (origpending > 0) 784 memcpy(ctx->pending, inbuf_tmp, 785 Py_UNICODE_SIZE * origpending); 786 ctx->pendingsize = origpending; 787 goto errorexit; 788 } 789 790 if (inbuf < inbuf_end) { 791 ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); 792 if (ctx->pendingsize > MAXENCPENDING) { 793 /* normal codecs can't reach here */ 794 ctx->pendingsize = 0; 795 PyErr_SetString(PyExc_UnicodeError, 796 "pending buffer overflow"); 797 goto errorexit; 798 } 799 memcpy(ctx->pending, inbuf, 800 ctx->pendingsize * Py_UNICODE_SIZE); 801 } 802 803 if (inbuf_tmp != NULL) 804 PyMem_Del(inbuf_tmp); 805 Py_XDECREF(ucvt); 806 return r; 807 807 808 808 errorexit: 809 810 811 812 813 809 if (inbuf_tmp != NULL) 810 PyMem_Del(inbuf_tmp); 811 Py_XDECREF(r); 812 Py_XDECREF(ucvt); 813 return NULL; 814 814 } 815 815 816 816 static int 817 817 decoder_append_pending(MultibyteStatefulDecoderContext *ctx, 818 819 { 820 821 822 823 824 825 826 827 828 829 830 818 MultibyteDecodeBuffer *buf) 819 { 820 Py_ssize_t npendings; 821 822 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 823 if (npendings + ctx->pendingsize > MAXDECPENDING || 824 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { 825 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); 826 return -1; 827 } 828 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); 829 ctx->pendingsize += npendings; 830 return 0; 831 831 } 832 832 833 833 static int 834 834 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, 835 836 { 837 838 839 840 841 842 843 844 845 846 847 848 835 Py_ssize_t size) 836 { 837 buf->inbuf = buf->inbuf_top = (const unsigned char *)data; 838 buf->inbuf_end = buf->inbuf_top + size; 839 if (buf->outobj == NULL) { /* only if outobj is not allocated yet */ 840 buf->outobj = PyUnicode_FromUnicode(NULL, size); 841 if (buf->outobj == NULL) 842 return -1; 843 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj); 844 buf->outbuf_end = buf->outbuf + 845 PyUnicode_GET_SIZE(buf->outobj); 846 } 847 848 return 0; 849 849 } 850 850 851 851 static int 852 852 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, 853 854 { 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 853 MultibyteDecodeBuffer *buf) 854 { 855 while (buf->inbuf < buf->inbuf_end) { 856 Py_ssize_t inleft, outleft; 857 Py_ssize_t r; 858 859 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 860 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); 861 862 r = ctx->codec->decode(&ctx->state, ctx->codec->config, 863 &buf->inbuf, inleft, &buf->outbuf, outleft); 864 if (r == 0 || r == MBERR_TOOFEW) 865 break; 866 else if (multibytecodec_decerror(ctx->codec, &ctx->state, 867 buf, ctx->errors, r)) 868 return -1; 869 } 870 return 0; 871 871 } 872 872 … … 878 878 static PyObject * 879 879 mbiencoder_encode(MultibyteIncrementalEncoderObject *self, 880 881 { 882 883 884 885 886 887 888 889 880 PyObject *args, PyObject *kwargs) 881 { 882 PyObject *data; 883 int final = 0; 884 885 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode", 886 incrementalkwarglist, &data, &final)) 887 return NULL; 888 889 return encoder_encode_stateful(STATEFUL_ECTX(self), data, final); 890 890 } 891 891 … … 893 893 mbiencoder_reset(MultibyteIncrementalEncoderObject *self) 894 894 { 895 896 897 898 899 900 895 if (self->codec->decreset != NULL && 896 self->codec->decreset(&self->state, self->codec->config) != 0) 897 return NULL; 898 self->pendingsize = 0; 899 900 Py_RETURN_NONE; 901 901 } 902 902 903 903 static struct PyMethodDef mbiencoder_methods[] = { 904 {"encode",(PyCFunction)mbiencoder_encode,905 906 {"reset",(PyCFunction)mbiencoder_reset,907 908 {NULL,NULL},904 {"encode", (PyCFunction)mbiencoder_encode, 905 METH_VARARGS | METH_KEYWORDS, NULL}, 906 {"reset", (PyCFunction)mbiencoder_reset, 907 METH_NOARGS, NULL}, 908 {NULL, NULL}, 909 909 }; 910 910 … … 912 912 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 913 913 { 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 914 MultibyteIncrementalEncoderObject *self; 915 PyObject *codec = NULL; 916 char *errors = NULL; 917 918 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder", 919 incnewkwarglist, &errors)) 920 return NULL; 921 922 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); 923 if (self == NULL) 924 return NULL; 925 926 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 927 if (codec == NULL) 928 goto errorexit; 929 if (!MultibyteCodec_Check(codec)) { 930 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 931 goto errorexit; 932 } 933 934 self->codec = ((MultibyteCodecObject *)codec)->codec; 935 self->pendingsize = 0; 936 self->errors = internal_error_callback(errors); 937 if (self->errors == NULL) 938 goto errorexit; 939 if (self->codec->encinit != NULL && 940 self->codec->encinit(&self->state, self->codec->config) != 0) 941 goto errorexit; 942 943 Py_DECREF(codec); 944 return (PyObject *)self; 945 945 946 946 errorexit: 947 948 949 947 Py_XDECREF(self); 948 Py_XDECREF(codec); 949 return NULL; 950 950 } 951 951 … … 953 953 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds) 954 954 { 955 955 return 0; 956 956 } 957 957 958 958 static int 959 959 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self, 960 961 { 962 963 964 960 visitproc visit, void *arg) 961 { 962 if (ERROR_ISCUSTOM(self->errors)) 963 Py_VISIT(self->errors); 964 return 0; 965 965 } 966 966 … … 968 968 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self) 969 969 { 970 971 972 970 PyObject_GC_UnTrack(self); 971 ERROR_DECREF(self->errors); 972 Py_TYPE(self)->tp_free(self); 973 973 } 974 974 975 975 static PyTypeObject MultibyteIncrementalEncoder_Type = { 976 977 "MultibyteIncrementalEncoder",/* tp_name */978 979 0,/* tp_itemsize */980 981 982 0,/* tp_print */983 0,/* tp_getattr */984 0,/* tp_setattr */985 0,/* tp_compare */986 0,/* tp_repr */987 0,/* tp_as_number */988 0,/* tp_as_sequence */989 0,/* tp_as_mapping */990 0,/* tp_hash */991 0,/* tp_call */992 0,/* tp_str */993 PyObject_GenericGetAttr,/* tp_getattro */994 0,/* tp_setattro */995 0,/* tp_as_buffer */996 997 | Py_TPFLAGS_BASETYPE,/* tp_flags */998 0,/* tp_doc */999 (traverseproc)mbiencoder_traverse,/* tp_traverse */1000 0,/* tp_clear */1001 0,/* tp_richcompare */1002 0,/* tp_weaklistoffset */1003 0,/* tp_iter */1004 0,/* tp_iterext */1005 mbiencoder_methods,/* tp_methods */1006 0,/* tp_members */1007 codecctx_getsets,/* tp_getset */1008 0,/* tp_base */1009 0,/* tp_dict */1010 0,/* tp_descr_get */1011 0,/* tp_descr_set */1012 0,/* tp_dictoffset */1013 mbiencoder_init,/* tp_init */1014 0,/* tp_alloc */1015 mbiencoder_new,/* tp_new */976 PyVarObject_HEAD_INIT(NULL, 0) 977 "MultibyteIncrementalEncoder", /* tp_name */ 978 sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */ 979 0, /* tp_itemsize */ 980 /* methods */ 981 (destructor)mbiencoder_dealloc, /* tp_dealloc */ 982 0, /* tp_print */ 983 0, /* tp_getattr */ 984 0, /* tp_setattr */ 985 0, /* tp_compare */ 986 0, /* tp_repr */ 987 0, /* tp_as_number */ 988 0, /* tp_as_sequence */ 989 0, /* tp_as_mapping */ 990 0, /* tp_hash */ 991 0, /* tp_call */ 992 0, /* tp_str */ 993 PyObject_GenericGetAttr, /* tp_getattro */ 994 0, /* tp_setattro */ 995 0, /* tp_as_buffer */ 996 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 997 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 998 0, /* tp_doc */ 999 (traverseproc)mbiencoder_traverse, /* tp_traverse */ 1000 0, /* tp_clear */ 1001 0, /* tp_richcompare */ 1002 0, /* tp_weaklistoffset */ 1003 0, /* tp_iter */ 1004 0, /* tp_iterext */ 1005 mbiencoder_methods, /* tp_methods */ 1006 0, /* tp_members */ 1007 codecctx_getsets, /* tp_getset */ 1008 0, /* tp_base */ 1009 0, /* tp_dict */ 1010 0, /* tp_descr_get */ 1011 0, /* tp_descr_set */ 1012 0, /* tp_dictoffset */ 1013 mbiencoder_init, /* tp_init */ 1014 0, /* tp_alloc */ 1015 mbiencoder_new, /* tp_new */ 1016 1016 }; 1017 1017 … … 1023 1023 static PyObject * 1024 1024 mbidecoder_decode(MultibyteIncrementalDecoderObject *self, 1025 1026 { 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1025 PyObject *args, PyObject *kwargs) 1026 { 1027 MultibyteDecodeBuffer buf; 1028 char *data, *wdata = NULL; 1029 Py_buffer pdata; 1030 Py_ssize_t wsize, finalsize = 0, size, origpending; 1031 int final = 0; 1032 1033 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i:decode", 1034 incrementalkwarglist, &pdata, &final)) 1035 return NULL; 1036 data = pdata.buf; 1037 size = pdata.len; 1038 1039 buf.outobj = buf.excobj = NULL; 1040 origpending = self->pendingsize; 1041 1042 if (self->pendingsize == 0) { 1043 wsize = size; 1044 wdata = data; 1045 } 1046 else { 1047 if (size > PY_SSIZE_T_MAX - self->pendingsize) { 1048 PyErr_NoMemory(); 1049 goto errorexit; 1050 } 1051 wsize = size + self->pendingsize; 1052 wdata = PyMem_Malloc(wsize); 1053 if (wdata == NULL) 1054 goto errorexit; 1055 memcpy(wdata, self->pending, self->pendingsize); 1056 memcpy(wdata + self->pendingsize, data, size); 1057 self->pendingsize = 0; 1058 } 1059 1060 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0) 1061 goto errorexit; 1062 1063 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf)) 1064 goto errorexit; 1065 1066 if (final && buf.inbuf < buf.inbuf_end) { 1067 if (multibytecodec_decerror(self->codec, &self->state, 1068 &buf, self->errors, MBERR_TOOFEW)) { 1069 /* recover the original pending buffer */ 1070 memcpy(self->pending, wdata, origpending); 1071 self->pendingsize = origpending; 1072 goto errorexit; 1073 } 1074 } 1075 1076 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */ 1077 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0) 1078 goto errorexit; 1079 } 1080 1081 finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); 1082 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) 1083 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) 1084 goto errorexit; 1085 1086 PyBuffer_Release(&pdata); 1087 if (wdata != data) 1088 PyMem_Del(wdata); 1089 Py_XDECREF(buf.excobj); 1090 return buf.outobj; 1091 1091 1092 1092 errorexit: 1093 1094 1095 1096 1097 1098 1093 PyBuffer_Release(&pdata); 1094 if (wdata != NULL && wdata != data) 1095 PyMem_Del(wdata); 1096 Py_XDECREF(buf.excobj); 1097 Py_XDECREF(buf.outobj); 1098 return NULL; 1099 1099 } 1100 1100 … … 1102 1102 mbidecoder_reset(MultibyteIncrementalDecoderObject *self) 1103 1103 { 1104 1105 1106 1107 1108 1109 1104 if (self->codec->decreset != NULL && 1105 self->codec->decreset(&self->state, self->codec->config) != 0) 1106 return NULL; 1107 self->pendingsize = 0; 1108 1109 Py_RETURN_NONE; 1110 1110 } 1111 1111 1112 1112 static struct PyMethodDef mbidecoder_methods[] = { 1113 {"decode",(PyCFunction)mbidecoder_decode,1114 1115 {"reset",(PyCFunction)mbidecoder_reset,1116 1117 {NULL,NULL},1113 {"decode", (PyCFunction)mbidecoder_decode, 1114 METH_VARARGS | METH_KEYWORDS, NULL}, 1115 {"reset", (PyCFunction)mbidecoder_reset, 1116 METH_NOARGS, NULL}, 1117 {NULL, NULL}, 1118 1118 }; 1119 1119 … … 1121 1121 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1122 1122 { 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1123 MultibyteIncrementalDecoderObject *self; 1124 PyObject *codec = NULL; 1125 char *errors = NULL; 1126 1127 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder", 1128 incnewkwarglist, &errors)) 1129 return NULL; 1130 1131 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); 1132 if (self == NULL) 1133 return NULL; 1134 1135 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1136 if (codec == NULL) 1137 goto errorexit; 1138 if (!MultibyteCodec_Check(codec)) { 1139 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1140 goto errorexit; 1141 } 1142 1143 self->codec = ((MultibyteCodecObject *)codec)->codec; 1144 self->pendingsize = 0; 1145 self->errors = internal_error_callback(errors); 1146 if (self->errors == NULL) 1147 goto errorexit; 1148 if (self->codec->decinit != NULL && 1149 self->codec->decinit(&self->state, self->codec->config) != 0) 1150 goto errorexit; 1151 1152 Py_DECREF(codec); 1153 return (PyObject *)self; 1154 1154 1155 1155 errorexit: 1156 1157 1158 1156 Py_XDECREF(self); 1157 Py_XDECREF(codec); 1158 return NULL; 1159 1159 } 1160 1160 … … 1162 1162 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds) 1163 1163 { 1164 1164 return 0; 1165 1165 } 1166 1166 1167 1167 static int 1168 1168 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self, 1169 1170 { 1171 1172 1173 1169 visitproc visit, void *arg) 1170 { 1171 if (ERROR_ISCUSTOM(self->errors)) 1172 Py_VISIT(self->errors); 1173 return 0; 1174 1174 } 1175 1175 … … 1177 1177 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self) 1178 1178 { 1179 1180 1181 1179 PyObject_GC_UnTrack(self); 1180 ERROR_DECREF(self->errors); 1181 Py_TYPE(self)->tp_free(self); 1182 1182 } 1183 1183 1184 1184 static PyTypeObject MultibyteIncrementalDecoder_Type = { 1185 1186 "MultibyteIncrementalDecoder",/* tp_name */1187 1188 0,/* tp_itemsize */1189 1190 1191 0,/* tp_print */1192 0,/* tp_getattr */1193 0,/* tp_setattr */1194 0,/* tp_compare */1195 0,/* tp_repr */1196 0,/* tp_as_number */1197 0,/* tp_as_sequence */1198 0,/* tp_as_mapping */1199 0,/* tp_hash */1200 0,/* tp_call */1201 0,/* tp_str */1202 PyObject_GenericGetAttr,/* tp_getattro */1203 0,/* tp_setattro */1204 0,/* tp_as_buffer */1205 1206 | Py_TPFLAGS_BASETYPE,/* tp_flags */1207 0,/* tp_doc */1208 (traverseproc)mbidecoder_traverse,/* tp_traverse */1209 0,/* tp_clear */1210 0,/* tp_richcompare */1211 0,/* tp_weaklistoffset */1212 0,/* tp_iter */1213 0,/* tp_iterext */1214 mbidecoder_methods,/* tp_methods */1215 0,/* tp_members */1216 codecctx_getsets,/* tp_getset */1217 0,/* tp_base */1218 0,/* tp_dict */1219 0,/* tp_descr_get */1220 0,/* tp_descr_set */1221 0,/* tp_dictoffset */1222 mbidecoder_init,/* tp_init */1223 0,/* tp_alloc */1224 mbidecoder_new,/* tp_new */1185 PyVarObject_HEAD_INIT(NULL, 0) 1186 "MultibyteIncrementalDecoder", /* tp_name */ 1187 sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */ 1188 0, /* tp_itemsize */ 1189 /* methods */ 1190 (destructor)mbidecoder_dealloc, /* tp_dealloc */ 1191 0, /* tp_print */ 1192 0, /* tp_getattr */ 1193 0, /* tp_setattr */ 1194 0, /* tp_compare */ 1195 0, /* tp_repr */ 1196 0, /* tp_as_number */ 1197 0, /* tp_as_sequence */ 1198 0, /* tp_as_mapping */ 1199 0, /* tp_hash */ 1200 0, /* tp_call */ 1201 0, /* tp_str */ 1202 PyObject_GenericGetAttr, /* tp_getattro */ 1203 0, /* tp_setattro */ 1204 0, /* tp_as_buffer */ 1205 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 1206 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 1207 0, /* tp_doc */ 1208 (traverseproc)mbidecoder_traverse, /* tp_traverse */ 1209 0, /* tp_clear */ 1210 0, /* tp_richcompare */ 1211 0, /* tp_weaklistoffset */ 1212 0, /* tp_iter */ 1213 0, /* tp_iterext */ 1214 mbidecoder_methods, /* tp_methods */ 1215 0, /* tp_members */ 1216 codecctx_getsets, /* tp_getset */ 1217 0, /* tp_base */ 1218 0, /* tp_dict */ 1219 0, /* tp_descr_get */ 1220 0, /* tp_descr_set */ 1221 0, /* tp_dictoffset */ 1222 mbidecoder_init, /* tp_init */ 1223 0, /* tp_alloc */ 1224 mbidecoder_new, /* tp_new */ 1225 1225 }; 1226 1226 … … 1232 1232 static PyObject * 1233 1233 mbstreamreader_iread(MultibyteStreamReaderObject *self, 1234 1235 { 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1234 const char *method, Py_ssize_t sizehint) 1235 { 1236 MultibyteDecodeBuffer buf; 1237 PyObject *cres; 1238 Py_ssize_t rsize, finalsize = 0; 1239 1240 if (sizehint == 0) 1241 return PyUnicode_FromUnicode(NULL, 0); 1242 1243 buf.outobj = buf.excobj = NULL; 1244 cres = NULL; 1245 1246 for (;;) { 1247 int endoffile; 1248 1249 if (sizehint < 0) 1250 cres = PyObject_CallMethod(self->stream, 1251 (char *)method, NULL); 1252 else 1253 cres = PyObject_CallMethod(self->stream, 1254 (char *)method, "i", sizehint); 1255 if (cres == NULL) 1256 goto errorexit; 1257 1258 if (!PyString_Check(cres)) { 1259 PyErr_SetString(PyExc_TypeError, 1260 "stream function returned a " 1261 "non-string object"); 1262 goto errorexit; 1263 } 1264 1265 endoffile = (PyString_GET_SIZE(cres) == 0); 1266 1267 if (self->pendingsize > 0) { 1268 PyObject *ctr; 1269 char *ctrdata; 1270 1271 if (PyString_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { 1272 PyErr_NoMemory(); 1273 goto errorexit; 1274 } 1275 rsize = PyString_GET_SIZE(cres) + self->pendingsize; 1276 ctr = PyString_FromStringAndSize(NULL, rsize); 1277 if (ctr == NULL) 1278 goto errorexit; 1279 ctrdata = PyString_AS_STRING(ctr); 1280 memcpy(ctrdata, self->pending, self->pendingsize); 1281 memcpy(ctrdata + self->pendingsize, 1282 PyString_AS_STRING(cres), 1283 PyString_GET_SIZE(cres)); 1284 Py_DECREF(cres); 1285 cres = ctr; 1286 self->pendingsize = 0; 1287 } 1288 1289 rsize = PyString_GET_SIZE(cres); 1290 if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres), 1291 rsize) != 0) 1292 goto errorexit; 1293 1294 if (rsize > 0 && decoder_feed_buffer( 1295 (MultibyteStatefulDecoderContext *)self, &buf)) 1296 goto errorexit; 1297 1298 if (endoffile || sizehint < 0) { 1299 if (buf.inbuf < buf.inbuf_end && 1300 multibytecodec_decerror(self->codec, &self->state, 1301 &buf, self->errors, MBERR_TOOFEW)) 1302 goto errorexit; 1303 } 1304 1305 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ 1306 if (decoder_append_pending(STATEFUL_DCTX(self), 1307 &buf) != 0) 1308 goto errorexit; 1309 } 1310 1311 finalsize = (Py_ssize_t)(buf.outbuf - 1312 PyUnicode_AS_UNICODE(buf.outobj)); 1313 Py_DECREF(cres); 1314 cres = NULL; 1315 1316 if (sizehint < 0 || finalsize != 0 || rsize == 0) 1317 break; 1318 1319 sizehint = 1; /* read 1 more byte and retry */ 1320 } 1321 1322 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) 1323 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) 1324 goto errorexit; 1325 1326 Py_XDECREF(cres); 1327 Py_XDECREF(buf.excobj); 1328 return buf.outobj; 1329 1329 1330 1330 errorexit: 1331 1332 1333 1334 1331 Py_XDECREF(cres); 1332 Py_XDECREF(buf.excobj); 1333 Py_XDECREF(buf.outobj); 1334 return NULL; 1335 1335 } 1336 1336 … … 1338 1338 mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) 1339 1339 { 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1340 PyObject *sizeobj = NULL; 1341 Py_ssize_t size; 1342 1343 if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj)) 1344 return NULL; 1345 1346 if (sizeobj == Py_None || sizeobj == NULL) 1347 size = -1; 1348 else if (PyInt_Check(sizeobj)) 1349 size = PyInt_AsSsize_t(sizeobj); 1350 else { 1351 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1352 return NULL; 1353 } 1354 1355 return mbstreamreader_iread(self, "read", size); 1356 1356 } 1357 1357 … … 1359 1359 mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) 1360 1360 { 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1361 PyObject *sizeobj = NULL; 1362 Py_ssize_t size; 1363 1364 if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj)) 1365 return NULL; 1366 1367 if (sizeobj == Py_None || sizeobj == NULL) 1368 size = -1; 1369 else if (PyInt_Check(sizeobj)) 1370 size = PyInt_AsSsize_t(sizeobj); 1371 else { 1372 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1373 return NULL; 1374 } 1375 1376 return mbstreamreader_iread(self, "readline", size); 1377 1377 } 1378 1378 … … 1380 1380 mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) 1381 1381 { 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1382 PyObject *sizehintobj = NULL, *r, *sr; 1383 Py_ssize_t sizehint; 1384 1385 if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj)) 1386 return NULL; 1387 1388 if (sizehintobj == Py_None || sizehintobj == NULL) 1389 sizehint = -1; 1390 else if (PyInt_Check(sizehintobj)) 1391 sizehint = PyInt_AsSsize_t(sizehintobj); 1392 else { 1393 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1394 return NULL; 1395 } 1396 1397 r = mbstreamreader_iread(self, "read", sizehint); 1398 if (r == NULL) 1399 return NULL; 1400 1401 sr = PyUnicode_Splitlines(r, 1); 1402 Py_DECREF(r); 1403 return sr; 1404 1404 } 1405 1405 … … 1407 1407 mbstreamreader_reset(MultibyteStreamReaderObject *self) 1408 1408 { 1409 1410 1411 1412 1413 1414 1409 if (self->codec->decreset != NULL && 1410 self->codec->decreset(&self->state, self->codec->config) != 0) 1411 return NULL; 1412 self->pendingsize = 0; 1413 1414 Py_RETURN_NONE; 1415 1415 } 1416 1416 1417 1417 static struct PyMethodDef mbstreamreader_methods[] = { 1418 {"read",(PyCFunction)mbstreamreader_read,1419 1420 {"readline",(PyCFunction)mbstreamreader_readline,1421 1422 {"readlines",(PyCFunction)mbstreamreader_readlines,1423 1424 {"reset",(PyCFunction)mbstreamreader_reset,1425 1426 {NULL,NULL},1418 {"read", (PyCFunction)mbstreamreader_read, 1419 METH_VARARGS, NULL}, 1420 {"readline", (PyCFunction)mbstreamreader_readline, 1421 METH_VARARGS, NULL}, 1422 {"readlines", (PyCFunction)mbstreamreader_readlines, 1423 METH_VARARGS, NULL}, 1424 {"reset", (PyCFunction)mbstreamreader_reset, 1425 METH_NOARGS, NULL}, 1426 {NULL, NULL}, 1427 1427 }; 1428 1428 1429 1429 static PyMemberDef mbstreamreader_members[] = { 1430 {"stream",T_OBJECT,1431 1432 1433 1430 {"stream", T_OBJECT, 1431 offsetof(MultibyteStreamReaderObject, stream), 1432 READONLY, NULL}, 1433 {NULL,} 1434 1434 }; 1435 1435 … … 1437 1437 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1438 1438 { 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1439 MultibyteStreamReaderObject *self; 1440 PyObject *stream, *codec = NULL; 1441 char *errors = NULL; 1442 1443 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader", 1444 streamkwarglist, &stream, &errors)) 1445 return NULL; 1446 1447 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); 1448 if (self == NULL) 1449 return NULL; 1450 1451 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1452 if (codec == NULL) 1453 goto errorexit; 1454 if (!MultibyteCodec_Check(codec)) { 1455 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1456 goto errorexit; 1457 } 1458 1459 self->codec = ((MultibyteCodecObject *)codec)->codec; 1460 self->stream = stream; 1461 Py_INCREF(stream); 1462 self->pendingsize = 0; 1463 self->errors = internal_error_callback(errors); 1464 if (self->errors == NULL) 1465 goto errorexit; 1466 if (self->codec->decinit != NULL && 1467 self->codec->decinit(&self->state, self->codec->config) != 0) 1468 goto errorexit; 1469 1470 Py_DECREF(codec); 1471 return (PyObject *)self; 1472 1472 1473 1473 errorexit: 1474 1475 1476 1474 Py_XDECREF(self); 1475 Py_XDECREF(codec); 1476 return NULL; 1477 1477 } 1478 1478 … … 1480 1480 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds) 1481 1481 { 1482 1482 return 0; 1483 1483 } 1484 1484 1485 1485 static int 1486 1486 mbstreamreader_traverse(MultibyteStreamReaderObject *self, 1487 1488 { 1489 1490 1491 1492 1487 visitproc visit, void *arg) 1488 { 1489 if (ERROR_ISCUSTOM(self->errors)) 1490 Py_VISIT(self->errors); 1491 Py_VISIT(self->stream); 1492 return 0; 1493 1493 } 1494 1494 … … 1496 1496 mbstreamreader_dealloc(MultibyteStreamReaderObject *self) 1497 1497 { 1498 1499 1500 1501 1498 PyObject_GC_UnTrack(self); 1499 ERROR_DECREF(self->errors); 1500 Py_XDECREF(self->stream); 1501 Py_TYPE(self)->tp_free(self); 1502 1502 } 1503 1503 1504 1504 static PyTypeObject MultibyteStreamReader_Type = { 1505 1506 "MultibyteStreamReader",/* tp_name */1507 1508 0,/* tp_itemsize */1509 1510 1511 0,/* tp_print */1512 0,/* tp_getattr */1513 0,/* tp_setattr */1514 0,/* tp_compare */1515 0,/* tp_repr */1516 0,/* tp_as_number */1517 0,/* tp_as_sequence */1518 0,/* tp_as_mapping */1519 0,/* tp_hash */1520 0,/* tp_call */1521 0,/* tp_str */1522 PyObject_GenericGetAttr,/* tp_getattro */1523 0,/* tp_setattro */1524 0,/* tp_as_buffer */1525 1526 | Py_TPFLAGS_BASETYPE,/* tp_flags */1527 0,/* tp_doc */1528 (traverseproc)mbstreamreader_traverse,/* tp_traverse */1529 0,/* tp_clear */1530 0,/* tp_richcompare */1531 0,/* tp_weaklistoffset */1532 0,/* tp_iter */1533 0,/* tp_iterext */1534 mbstreamreader_methods,/* tp_methods */1535 mbstreamreader_members,/* tp_members */1536 codecctx_getsets,/* tp_getset */1537 0,/* tp_base */1538 0,/* tp_dict */1539 0,/* tp_descr_get */1540 0,/* tp_descr_set */1541 0,/* tp_dictoffset */1542 mbstreamreader_init,/* tp_init */1543 0,/* tp_alloc */1544 mbstreamreader_new,/* tp_new */1505 PyVarObject_HEAD_INIT(NULL, 0) 1506 "MultibyteStreamReader", /* tp_name */ 1507 sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ 1508 0, /* tp_itemsize */ 1509 /* methods */ 1510 (destructor)mbstreamreader_dealloc, /* tp_dealloc */ 1511 0, /* tp_print */ 1512 0, /* tp_getattr */ 1513 0, /* tp_setattr */ 1514 0, /* tp_compare */ 1515 0, /* tp_repr */ 1516 0, /* tp_as_number */ 1517 0, /* tp_as_sequence */ 1518 0, /* tp_as_mapping */ 1519 0, /* tp_hash */ 1520 0, /* tp_call */ 1521 0, /* tp_str */ 1522 PyObject_GenericGetAttr, /* tp_getattro */ 1523 0, /* tp_setattro */ 1524 0, /* tp_as_buffer */ 1525 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 1526 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 1527 0, /* tp_doc */ 1528 (traverseproc)mbstreamreader_traverse, /* tp_traverse */ 1529 0, /* tp_clear */ 1530 0, /* tp_richcompare */ 1531 0, /* tp_weaklistoffset */ 1532 0, /* tp_iter */ 1533 0, /* tp_iterext */ 1534 mbstreamreader_methods, /* tp_methods */ 1535 mbstreamreader_members, /* tp_members */ 1536 codecctx_getsets, /* tp_getset */ 1537 0, /* tp_base */ 1538 0, /* tp_dict */ 1539 0, /* tp_descr_get */ 1540 0, /* tp_descr_set */ 1541 0, /* tp_dictoffset */ 1542 mbstreamreader_init, /* tp_init */ 1543 0, /* tp_alloc */ 1544 mbstreamreader_new, /* tp_new */ 1545 1545 }; 1546 1546 … … 1552 1552 static int 1553 1553 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, 1554 1555 { 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1554 PyObject *unistr) 1555 { 1556 PyObject *str, *wr; 1557 1558 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); 1559 if (str == NULL) 1560 return -1; 1561 1562 wr = PyObject_CallMethod(self->stream, "write", "O", str); 1563 Py_DECREF(str); 1564 if (wr == NULL) 1565 return -1; 1566 1567 Py_DECREF(wr); 1568 return 0; 1569 1569 } 1570 1570 … … 1572 1572 mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *strobj) 1573 1573 { 1574 1575 1576 1577 1574 if (mbstreamwriter_iwrite(self, strobj)) 1575 return NULL; 1576 else 1577 Py_RETURN_NONE; 1578 1578 } 1579 1579 … … 1581 1581 mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines) 1582 1582 { 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1583 PyObject *strobj; 1584 int i, r; 1585 1586 if (!PySequence_Check(lines)) { 1587 PyErr_SetString(PyExc_TypeError, 1588 "arg must be a sequence object"); 1589 return NULL; 1590 } 1591 1592 for (i = 0; i < PySequence_Length(lines); i++) { 1593 /* length can be changed even within this loop */ 1594 strobj = PySequence_GetItem(lines, i); 1595 if (strobj == NULL) 1596 return NULL; 1597 1598 r = mbstreamwriter_iwrite(self, strobj); 1599 Py_DECREF(strobj); 1600 if (r == -1) 1601 return NULL; 1602 } 1603 1604 Py_RETURN_NONE; 1605 1605 } 1606 1606 … … 1608 1608 mbstreamwriter_reset(MultibyteStreamWriterObject *self) 1609 1609 { 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1610 const Py_UNICODE *pending; 1611 PyObject *pwrt; 1612 1613 pending = self->pending; 1614 pwrt = multibytecodec_encode(self->codec, &self->state, 1615 &pending, self->pendingsize, self->errors, 1616 MBENC_FLUSH | MBENC_RESET); 1617 /* some pending buffer can be truncated when UnicodeEncodeError is 1618 * raised on 'strict' mode. but, 'reset' method is designed to 1619 * reset the pending buffer or states so failed string sequence 1620 * ought to be missed */ 1621 self->pendingsize = 0; 1622 if (pwrt == NULL) 1623 return NULL; 1624 1625 if (PyString_Size(pwrt) > 0) { 1626 PyObject *wr; 1627 wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); 1628 if (wr == NULL) { 1629 Py_DECREF(pwrt); 1630 return NULL; 1631 } 1632 } 1633 Py_DECREF(pwrt); 1634 1635 Py_RETURN_NONE; 1636 1636 } 1637 1637 … … 1639 1639 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1640 1640 { 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1641 MultibyteStreamWriterObject *self; 1642 PyObject *stream, *codec = NULL; 1643 char *errors = NULL; 1644 1645 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter", 1646 streamkwarglist, &stream, &errors)) 1647 return NULL; 1648 1649 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); 1650 if (self == NULL) 1651 return NULL; 1652 1653 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1654 if (codec == NULL) 1655 goto errorexit; 1656 if (!MultibyteCodec_Check(codec)) { 1657 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1658 goto errorexit; 1659 } 1660 1661 self->codec = ((MultibyteCodecObject *)codec)->codec; 1662 self->stream = stream; 1663 Py_INCREF(stream); 1664 self->pendingsize = 0; 1665 self->errors = internal_error_callback(errors); 1666 if (self->errors == NULL) 1667 goto errorexit; 1668 if (self->codec->encinit != NULL && 1669 self->codec->encinit(&self->state, self->codec->config) != 0) 1670 goto errorexit; 1671 1672 Py_DECREF(codec); 1673 return (PyObject *)self; 1674 1674 1675 1675 errorexit: 1676 1677 1678 1676 Py_XDECREF(self); 1677 Py_XDECREF(codec); 1678 return NULL; 1679 1679 } 1680 1680 … … 1682 1682 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds) 1683 1683 { 1684 1684 return 0; 1685 1685 } 1686 1686 1687 1687 static int 1688 1688 mbstreamwriter_traverse(MultibyteStreamWriterObject *self, 1689 1690 { 1691 1692 1693 1694 1689 visitproc visit, void *arg) 1690 { 1691 if (ERROR_ISCUSTOM(self->errors)) 1692 Py_VISIT(self->errors); 1693 Py_VISIT(self->stream); 1694 return 0; 1695 1695 } 1696 1696 … … 1698 1698 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) 1699 1699 { 1700 1701 1702 1703 1700 PyObject_GC_UnTrack(self); 1701 ERROR_DECREF(self->errors); 1702 Py_XDECREF(self->stream); 1703 Py_TYPE(self)->tp_free(self); 1704 1704 } 1705 1705 1706 1706 static struct PyMethodDef mbstreamwriter_methods[] = { 1707 {"write",(PyCFunction)mbstreamwriter_write,1708 1709 {"writelines",(PyCFunction)mbstreamwriter_writelines,1710 1711 {"reset",(PyCFunction)mbstreamwriter_reset,1712 1713 {NULL,NULL},1707 {"write", (PyCFunction)mbstreamwriter_write, 1708 METH_O, NULL}, 1709 {"writelines", (PyCFunction)mbstreamwriter_writelines, 1710 METH_O, NULL}, 1711 {"reset", (PyCFunction)mbstreamwriter_reset, 1712 METH_NOARGS, NULL}, 1713 {NULL, NULL}, 1714 1714 }; 1715 1715 1716 1716 static PyMemberDef mbstreamwriter_members[] = { 1717 {"stream",T_OBJECT,1718 1719 1720 1717 {"stream", T_OBJECT, 1718 offsetof(MultibyteStreamWriterObject, stream), 1719 READONLY, NULL}, 1720 {NULL,} 1721 1721 }; 1722 1722 1723 1723 static PyTypeObject MultibyteStreamWriter_Type = { 1724 1725 "MultibyteStreamWriter",/* tp_name */1726 1727 0,/* tp_itemsize */1728 1729 1730 0,/* tp_print */1731 0,/* tp_getattr */1732 0,/* tp_setattr */1733 0,/* tp_compare */1734 0,/* tp_repr */1735 0,/* tp_as_number */1736 0,/* tp_as_sequence */1737 0,/* tp_as_mapping */1738 0,/* tp_hash */1739 0,/* tp_call */1740 0,/* tp_str */1741 PyObject_GenericGetAttr,/* tp_getattro */1742 0,/* tp_setattro */1743 0,/* tp_as_buffer */1744 1745 | Py_TPFLAGS_BASETYPE,/* tp_flags */1746 0,/* tp_doc */1747 (traverseproc)mbstreamwriter_traverse,/* tp_traverse */1748 0,/* tp_clear */1749 0,/* tp_richcompare */1750 0,/* tp_weaklistoffset */1751 0,/* tp_iter */1752 0,/* tp_iterext */1753 mbstreamwriter_methods,/* tp_methods */1754 mbstreamwriter_members,/* tp_members */1755 codecctx_getsets,/* tp_getset */1756 0,/* tp_base */1757 0,/* tp_dict */1758 0,/* tp_descr_get */1759 0,/* tp_descr_set */1760 0,/* tp_dictoffset */1761 mbstreamwriter_init,/* tp_init */1762 0,/* tp_alloc */1763 mbstreamwriter_new,/* tp_new */1724 PyVarObject_HEAD_INIT(NULL, 0) 1725 "MultibyteStreamWriter", /* tp_name */ 1726 sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ 1727 0, /* tp_itemsize */ 1728 /* methods */ 1729 (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ 1730 0, /* tp_print */ 1731 0, /* tp_getattr */ 1732 0, /* tp_setattr */ 1733 0, /* tp_compare */ 1734 0, /* tp_repr */ 1735 0, /* tp_as_number */ 1736 0, /* tp_as_sequence */ 1737 0, /* tp_as_mapping */ 1738 0, /* tp_hash */ 1739 0, /* tp_call */ 1740 0, /* tp_str */ 1741 PyObject_GenericGetAttr, /* tp_getattro */ 1742 0, /* tp_setattro */ 1743 0, /* tp_as_buffer */ 1744 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 1745 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 1746 0, /* tp_doc */ 1747 (traverseproc)mbstreamwriter_traverse, /* tp_traverse */ 1748 0, /* tp_clear */ 1749 0, /* tp_richcompare */ 1750 0, /* tp_weaklistoffset */ 1751 0, /* tp_iter */ 1752 0, /* tp_iterext */ 1753 mbstreamwriter_methods, /* tp_methods */ 1754 mbstreamwriter_members, /* tp_members */ 1755 codecctx_getsets, /* tp_getset */ 1756 0, /* tp_base */ 1757 0, /* tp_dict */ 1758 0, /* tp_descr_get */ 1759 0, /* tp_descr_set */ 1760 0, /* tp_dictoffset */ 1761 mbstreamwriter_init, /* tp_init */ 1762 0, /* tp_alloc */ 1763 mbstreamwriter_new, /* tp_new */ 1764 1764 }; 1765 1765 … … 1772 1772 __create_codec(PyObject *ignore, PyObject *arg) 1773 1773 { 1774 1775 1776 1777 if (!PyCObject_Check(arg)) {1778 1779 1780 1781 1782 codec = PyCObject_AsVoidPtr(arg);1783 1784 1785 1786 1787 1788 1789 1790 1791 1774 MultibyteCodecObject *self; 1775 MultibyteCodec *codec; 1776 1777 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) { 1778 PyErr_SetString(PyExc_ValueError, "argument type invalid"); 1779 return NULL; 1780 } 1781 1782 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME); 1783 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) 1784 return NULL; 1785 1786 self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type); 1787 if (self == NULL) 1788 return NULL; 1789 self->codec = codec; 1790 1791 return (PyObject *)self; 1792 1792 } 1793 1793 1794 1794 static struct PyMethodDef __methods[] = { 1795 1796 1795 {"__create_codec", (PyCFunction)__create_codec, METH_O}, 1796 {NULL, NULL}, 1797 1797 }; 1798 1798 … … 1800 1800 init_multibytecodec(void) 1801 1801 { 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 } 1802 int i; 1803 PyObject *m; 1804 PyTypeObject *typelist[] = { 1805 &MultibyteIncrementalEncoder_Type, 1806 &MultibyteIncrementalDecoder_Type, 1807 &MultibyteStreamReader_Type, 1808 &MultibyteStreamWriter_Type, 1809 NULL 1810 }; 1811 1812 if (PyType_Ready(&MultibyteCodec_Type) < 0) 1813 return; 1814 1815 m = Py_InitModule("_multibytecodec", __methods); 1816 if (m == NULL) 1817 return; 1818 1819 for (i = 0; typelist[i] != NULL; i++) { 1820 if (PyType_Ready(typelist[i]) < 0) 1821 return; 1822 Py_INCREF(typelist[i]); 1823 PyModule_AddObject(m, typelist[i]->tp_name, 1824 (PyObject *)typelist[i]); 1825 } 1826 1827 if (PyErr_Occurred()) 1828 Py_FatalError("can't initialize the _multibytecodec module"); 1829 } -
python/vendor/current/Modules/cjkcodecs/multibytecodec.h
r2 r388 24 24 25 25 typedef union { 26 27 28 29 30 26 void *p; 27 int i; 28 unsigned char c[8]; 29 ucs2_t u2[4]; 30 ucs4_t u4[2]; 31 31 } MultibyteCodec_State; 32 32 33 33 typedef int (*mbcodec_init)(const void *config); 34 34 typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state, 35 36 37 38 35 const void *config, 36 const Py_UNICODE **inbuf, Py_ssize_t inleft, 37 unsigned char **outbuf, Py_ssize_t outleft, 38 int flags); 39 39 typedef int (*mbencodeinit_func)(MultibyteCodec_State *state, 40 40 const void *config); 41 41 typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state, 42 43 42 const void *config, 43 unsigned char **outbuf, Py_ssize_t outleft); 44 44 typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state, 45 46 47 45 const void *config, 46 const unsigned char **inbuf, Py_ssize_t inleft, 47 Py_UNICODE **outbuf, Py_ssize_t outleft); 48 48 typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state, 49 49 const void *config); 50 50 typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state, 51 51 const void *config); 52 52 53 53 typedef struct { 54 55 56 57 58 59 60 61 62 54 const char *encoding; 55 const void *config; 56 mbcodec_init codecinit; 57 mbencode_func encode; 58 mbencodeinit_func encinit; 59 mbencodereset_func encreset; 60 mbdecode_func decode; 61 mbdecodeinit_func decinit; 62 mbdecodereset_func decreset; 63 63 } MultibyteCodec; 64 64 65 65 typedef struct { 66 67 66 PyObject_HEAD 67 MultibyteCodec *codec; 68 68 } MultibyteCodecObject; 69 69 70 70 #define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type) 71 71 72 #define _MultibyteStatefulCodec_HEAD 73 PyObject_HEAD\74 MultibyteCodec *codec;\75 MultibyteCodec_State state;\76 72 #define _MultibyteStatefulCodec_HEAD \ 73 PyObject_HEAD \ 74 MultibyteCodec *codec; \ 75 MultibyteCodec_State state; \ 76 PyObject *errors; 77 77 typedef struct { 78 78 _MultibyteStatefulCodec_HEAD 79 79 } MultibyteStatefulCodecContext; 80 80 81 #define MAXENCPENDING 82 #define _MultibyteStatefulEncoder_HEAD 83 _MultibyteStatefulCodec_HEAD\84 Py_UNICODE pending[MAXENCPENDING];\85 81 #define MAXENCPENDING 2 82 #define _MultibyteStatefulEncoder_HEAD \ 83 _MultibyteStatefulCodec_HEAD \ 84 Py_UNICODE pending[MAXENCPENDING]; \ 85 Py_ssize_t pendingsize; 86 86 typedef struct { 87 87 _MultibyteStatefulEncoder_HEAD 88 88 } MultibyteStatefulEncoderContext; 89 89 90 #define MAXDECPENDING 91 #define _MultibyteStatefulDecoder_HEAD 92 _MultibyteStatefulCodec_HEAD\93 unsigned char pending[MAXDECPENDING];\94 90 #define MAXDECPENDING 8 91 #define _MultibyteStatefulDecoder_HEAD \ 92 _MultibyteStatefulCodec_HEAD \ 93 unsigned char pending[MAXDECPENDING]; \ 94 Py_ssize_t pendingsize; 95 95 typedef struct { 96 96 _MultibyteStatefulDecoder_HEAD 97 97 } MultibyteStatefulDecoderContext; 98 98 99 99 typedef struct { 100 100 _MultibyteStatefulEncoder_HEAD 101 101 } MultibyteIncrementalEncoderObject; 102 102 103 103 typedef struct { 104 104 _MultibyteStatefulDecoder_HEAD 105 105 } MultibyteIncrementalDecoderObject; 106 106 107 107 typedef struct { 108 109 108 _MultibyteStatefulDecoder_HEAD 109 PyObject *stream; 110 110 } MultibyteStreamReaderObject; 111 111 112 112 typedef struct { 113 114 113 _MultibyteStatefulEncoder_HEAD 114 PyObject *stream; 115 115 } MultibyteStreamWriterObject; 116 116 117 117 /* positive values for illegal sequences */ 118 #define MBERR_TOOSMALL 119 #define MBERR_TOOFEW 120 #define MBERR_INTERNAL 118 #define MBERR_TOOSMALL (-1) /* insufficient output buffer space */ 119 #define MBERR_TOOFEW (-2) /* incomplete input buffer */ 120 #define MBERR_INTERNAL (-3) /* internal runtime error */ 121 121 122 #define ERROR_STRICT 123 #define ERROR_IGNORE 124 #define ERROR_REPLACE 125 #define ERROR_ISCUSTOM(p) 126 #define ERROR_DECREF(p) do { 127 if (p != NULL && ERROR_ISCUSTOM(p)) {\128 Py_DECREF(p);\129 }\122 #define ERROR_STRICT (PyObject *)(1) 123 #define ERROR_IGNORE (PyObject *)(2) 124 #define ERROR_REPLACE (PyObject *)(3) 125 #define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p)) 126 #define ERROR_DECREF(p) do { \ 127 if (p != NULL && ERROR_ISCUSTOM(p)) { \ 128 Py_DECREF(p); \ 129 } \ 130 130 } while (0); 131 131 132 #define MBENC_FLUSH 0x0001 /* encode all characters encodable */ 133 #define MBENC_MAX MBENC_FLUSH 132 #define MBENC_FLUSH 0x0001 /* encode all characters encodable */ 133 #define MBENC_MAX MBENC_FLUSH 134 135 #define PyMultibyteCodec_CAPSULE_NAME "multibytecodec.__map_*" 136 134 137 135 138 #ifdef __cplusplus
Note:
See TracChangeset
for help on using the changeset viewer.