Changeset 391 for python/trunk/Modules/cjkcodecs/cjkcodecs.h
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Modules/cjkcodecs/cjkcodecs.h
r10 r391 14 14 15 15 /* a unicode "undefined" codepoint */ 16 #define UNIINV 16 #define UNIINV 0xFFFE 17 17 18 18 /* internal-use DBCS codepoints which aren't used by any charsets */ 19 #define NOCHAR 20 #define MULTIC 21 #define DBCINV 19 #define NOCHAR 0xFFFF 20 #define MULTIC 0xFFFE 21 #define DBCINV 0xFFFD 22 22 23 23 /* shorter macros to save source size of mapping tables */ … … 28 28 29 29 struct dbcs_index { 30 31 30 const ucs2_t *map; 31 unsigned char bottom, top; 32 32 }; 33 33 typedef struct dbcs_index decode_map; 34 34 35 35 struct widedbcs_index { 36 37 36 const ucs4_t *map; 37 unsigned char bottom, top; 38 38 }; 39 39 typedef struct widedbcs_index widedecode_map; 40 40 41 41 struct unim_index { 42 43 42 const DBCHAR *map; 43 unsigned char bottom, top; 44 44 }; 45 45 typedef struct unim_index encode_map; 46 46 47 47 struct unim_index_bytebased { 48 49 48 const unsigned char *map; 49 unsigned char bottom, top; 50 50 }; 51 51 52 52 struct dbcs_map { 53 54 55 53 const char *charset; 54 const struct unim_index *encmap; 55 const struct dbcs_index *decmap; 56 56 }; 57 57 58 58 struct pair_encodemap { 59 60 59 ucs4_t uniseq; 60 DBCHAR code; 61 61 }; 62 62 … … 64 64 static const struct dbcs_map *mapping_list; 65 65 66 #define CODEC_INIT(encoding) 67 68 69 #define ENCODER_INIT(encoding) 70 static int encoding##_encode_init(\71 72 #define ENCODER(encoding) 73 static Py_ssize_t encoding##_encode(\74 MultibyteCodec_State *state, const void *config,\75 const Py_UNICODE **inbuf, Py_ssize_t inleft,\76 77 #define ENCODER_RESET(encoding) 78 static Py_ssize_t encoding##_encode_reset(\79 MultibyteCodec_State *state, const void *config,\80 81 82 #define DECODER_INIT(encoding) 83 static int encoding##_decode_init(\84 85 #define DECODER(encoding) 86 static Py_ssize_t encoding##_decode(\87 MultibyteCodec_State *state, const void *config,\88 const unsigned char **inbuf, Py_ssize_t inleft,\89 90 #define DECODER_RESET(encoding) 91 static Py_ssize_t encoding##_decode_reset(\92 66 #define CODEC_INIT(encoding) \ 67 static int encoding##_codec_init(const void *config) 68 69 #define ENCODER_INIT(encoding) \ 70 static int encoding##_encode_init( \ 71 MultibyteCodec_State *state, const void *config) 72 #define ENCODER(encoding) \ 73 static Py_ssize_t encoding##_encode( \ 74 MultibyteCodec_State *state, const void *config, \ 75 const Py_UNICODE **inbuf, Py_ssize_t inleft, \ 76 unsigned char **outbuf, Py_ssize_t outleft, int flags) 77 #define ENCODER_RESET(encoding) \ 78 static Py_ssize_t encoding##_encode_reset( \ 79 MultibyteCodec_State *state, const void *config, \ 80 unsigned char **outbuf, Py_ssize_t outleft) 81 82 #define DECODER_INIT(encoding) \ 83 static int encoding##_decode_init( \ 84 MultibyteCodec_State *state, const void *config) 85 #define DECODER(encoding) \ 86 static Py_ssize_t encoding##_decode( \ 87 MultibyteCodec_State *state, const void *config, \ 88 const unsigned char **inbuf, Py_ssize_t inleft, \ 89 Py_UNICODE **outbuf, Py_ssize_t outleft) 90 #define DECODER_RESET(encoding) \ 91 static Py_ssize_t encoding##_decode_reset( \ 92 MultibyteCodec_State *state, const void *config) 93 93 94 94 #if Py_UNICODE_SIZE == 4 95 #define UCS4INVALID(code) 96 if ((code) > 0xFFFF)\97 95 #define UCS4INVALID(code) \ 96 if ((code) > 0xFFFF) \ 97 return 1; 98 98 #else 99 #define UCS4INVALID(code) 100 101 #endif 102 103 #define NEXT_IN(i) 104 (*inbuf) += (i);\105 106 #define NEXT_OUT(o) 107 (*outbuf) += (o);\108 109 #define NEXT(i, o) 110 111 112 #define REQUIRE_INBUF(n) 113 if (inleft < (n))\114 115 #define REQUIRE_OUTBUF(n) 116 if (outleft < (n))\117 99 #define UCS4INVALID(code) \ 100 if (0) ; 101 #endif 102 103 #define NEXT_IN(i) \ 104 (*inbuf) += (i); \ 105 (inleft) -= (i); 106 #define NEXT_OUT(o) \ 107 (*outbuf) += (o); \ 108 (outleft) -= (o); 109 #define NEXT(i, o) \ 110 NEXT_IN(i) NEXT_OUT(o) 111 112 #define REQUIRE_INBUF(n) \ 113 if (inleft < (n)) \ 114 return MBERR_TOOFEW; 115 #define REQUIRE_OUTBUF(n) \ 116 if (outleft < (n)) \ 117 return MBERR_TOOSMALL; 118 118 119 119 #define IN1 ((*inbuf)[0]) … … 127 127 #define OUT4(c) ((*outbuf)[3]) = (c); 128 128 129 #define WRITE1(c1) 130 REQUIRE_OUTBUF(1)\131 132 #define WRITE2(c1, c2) 133 REQUIRE_OUTBUF(2)\134 (*outbuf)[0] = (c1);\135 136 #define WRITE3(c1, c2, c3) 137 REQUIRE_OUTBUF(3)\138 (*outbuf)[0] = (c1);\139 (*outbuf)[1] = (c2);\140 141 #define WRITE4(c1, c2, c3, c4) 142 REQUIRE_OUTBUF(4)\143 (*outbuf)[0] = (c1);\144 (*outbuf)[1] = (c2);\145 (*outbuf)[2] = (c3);\146 129 #define WRITE1(c1) \ 130 REQUIRE_OUTBUF(1) \ 131 (*outbuf)[0] = (c1); 132 #define WRITE2(c1, c2) \ 133 REQUIRE_OUTBUF(2) \ 134 (*outbuf)[0] = (c1); \ 135 (*outbuf)[1] = (c2); 136 #define WRITE3(c1, c2, c3) \ 137 REQUIRE_OUTBUF(3) \ 138 (*outbuf)[0] = (c1); \ 139 (*outbuf)[1] = (c2); \ 140 (*outbuf)[2] = (c3); 141 #define WRITE4(c1, c2, c3, c4) \ 142 REQUIRE_OUTBUF(4) \ 143 (*outbuf)[0] = (c1); \ 144 (*outbuf)[1] = (c2); \ 145 (*outbuf)[2] = (c3); \ 146 (*outbuf)[3] = (c4); 147 147 148 148 #if Py_UNICODE_SIZE == 2 149 # define WRITEUCS4(c) 150 REQUIRE_OUTBUF(2)\151 (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10);\152 (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff);\153 149 # define WRITEUCS4(c) \ 150 REQUIRE_OUTBUF(2) \ 151 (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ 152 (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ 153 NEXT_OUT(2) 154 154 #else 155 # define WRITEUCS4(c) 156 REQUIRE_OUTBUF(1)\157 **outbuf = (Py_UNICODE)(c);\158 159 #endif 160 161 #define _TRYMAP_ENC(m, assi, val) 162 ((m)->map != NULL && (val) >= (m)->bottom &&\163 (val)<= (m)->top && ((assi) = (m)->map[(val) -\164 165 #define TRYMAP_ENC_COND(charset, assi, uni) 166 167 #define TRYMAP_ENC(charset, assi, uni) 168 169 170 #define _TRYMAP_DEC(m, assi, val) 171 ((m)->map != NULL && (val) >= (m)->bottom &&\172 (val)<= (m)->top && ((assi) = (m)->map[(val) -\173 174 #define TRYMAP_DEC(charset, assi, c1, c2) 175 176 177 #define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) 178 ((m)->map != NULL && (val) >= (m)->bottom &&\179 (val)<= (m)->top &&\180 181 182 183 #define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) 184 185 186 #define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) 187 155 # define WRITEUCS4(c) \ 156 REQUIRE_OUTBUF(1) \ 157 **outbuf = (Py_UNICODE)(c); \ 158 NEXT_OUT(1) 159 #endif 160 161 #define _TRYMAP_ENC(m, assi, val) \ 162 ((m)->map != NULL && (val) >= (m)->bottom && \ 163 (val)<= (m)->top && ((assi) = (m)->map[(val) - \ 164 (m)->bottom]) != NOCHAR) 165 #define TRYMAP_ENC_COND(charset, assi, uni) \ 166 _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff) 167 #define TRYMAP_ENC(charset, assi, uni) \ 168 if TRYMAP_ENC_COND(charset, assi, uni) 169 170 #define _TRYMAP_DEC(m, assi, val) \ 171 ((m)->map != NULL && (val) >= (m)->bottom && \ 172 (val)<= (m)->top && ((assi) = (m)->map[(val) - \ 173 (m)->bottom]) != UNIINV) 174 #define TRYMAP_DEC(charset, assi, c1, c2) \ 175 if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) 176 177 #define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \ 178 ((m)->map != NULL && (val) >= (m)->bottom && \ 179 (val)<= (m)->top && \ 180 ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \ 181 (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \ 182 (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1)) 183 #define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \ 184 if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \ 185 assplane, asshi, asslo, (uni) & 0xff) 186 #define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \ 187 if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2) 188 188 189 189 #if Py_UNICODE_SIZE == 2 190 #define DECODE_SURROGATE(c) 191 if (c >> 10 == 0xd800 >> 10) { /* high surrogate */\192 REQUIRE_INBUF(2)\193 194 195 ((ucs4_t)(IN2) - 0xdc00);\196 }\197 198 #define GET_INSIZE(c) 190 #define DECODE_SURROGATE(c) \ 191 if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \ 192 REQUIRE_INBUF(2) \ 193 if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \ 194 c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \ 195 ((ucs4_t)(IN2) - 0xdc00); \ 196 } \ 197 } 198 #define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1) 199 199 #else 200 200 #define DECODE_SURROGATE(c) {;} 201 #define GET_INSIZE(c) 201 #define GET_INSIZE(c) 1 202 202 #endif 203 203 … … 206 206 #define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap}, 207 207 #define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap}, 208 #define END_MAPPINGS_LIST 209 {"", NULL, NULL} };\210 static const struct dbcs_map *mapping_list =\211 208 #define END_MAPPINGS_LIST \ 209 {"", NULL, NULL} }; \ 210 static const struct dbcs_map *mapping_list = \ 211 (const struct dbcs_map *)_mapping_list; 212 212 213 213 #define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = { 214 #define _STATEFUL_METHODS(enc) 215 enc##_encode,\216 enc##_encode_init,\217 enc##_encode_reset,\218 enc##_decode,\219 enc##_decode_init,\220 221 #define _STATELESS_METHODS(enc) 222 enc##_encode, NULL, NULL,\223 224 #define CODEC_STATEFUL(enc) { 225 #enc, NULL, NULL,\226 _STATEFUL_METHODS(enc)\214 #define _STATEFUL_METHODS(enc) \ 215 enc##_encode, \ 216 enc##_encode_init, \ 217 enc##_encode_reset, \ 218 enc##_decode, \ 219 enc##_decode_init, \ 220 enc##_decode_reset, 221 #define _STATELESS_METHODS(enc) \ 222 enc##_encode, NULL, NULL, \ 223 enc##_decode, NULL, NULL, 224 #define CODEC_STATEFUL(enc) { \ 225 #enc, NULL, NULL, \ 226 _STATEFUL_METHODS(enc) \ 227 227 }, 228 #define CODEC_STATELESS(enc) { 229 #enc, NULL, NULL,\230 _STATELESS_METHODS(enc)\228 #define CODEC_STATELESS(enc) { \ 229 #enc, NULL, NULL, \ 230 _STATELESS_METHODS(enc) \ 231 231 }, 232 #define CODEC_STATELESS_WINIT(enc) { 233 #enc, NULL,\234 enc##_codec_init,\235 _STATELESS_METHODS(enc)\232 #define CODEC_STATELESS_WINIT(enc) { \ 233 #enc, NULL, \ 234 enc##_codec_init, \ 235 _STATELESS_METHODS(enc) \ 236 236 }, 237 #define END_CODECS_LIST 238 {"", NULL,} };\239 static const MultibyteCodec *codec_list =\240 237 #define END_CODECS_LIST \ 238 {"", NULL,} }; \ 239 static const MultibyteCodec *codec_list = \ 240 (const MultibyteCodec *)_codec_list; 241 241 242 242 static PyObject * 243 243 getmultibytecodec(void) 244 244 { 245 246 247 248 249 250 251 252 253 254 245 static PyObject *cofunc = NULL; 246 247 if (cofunc == NULL) { 248 PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec"); 249 if (mod == NULL) 250 return NULL; 251 cofunc = PyObject_GetAttrString(mod, "__create_codec"); 252 Py_DECREF(mod); 253 } 254 return cofunc; 255 255 } 256 256 … … 258 258 getcodec(PyObject *self, PyObject *encoding) 259 259 { 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 codecobj = PyCObject_FromVoidPtr((void *)codec, NULL);286 287 288 289 290 291 292 260 PyObject *codecobj, *r, *cofunc; 261 const MultibyteCodec *codec; 262 const char *enc; 263 264 if (!PyString_Check(encoding)) { 265 PyErr_SetString(PyExc_TypeError, 266 "encoding name must be a string."); 267 return NULL; 268 } 269 270 cofunc = getmultibytecodec(); 271 if (cofunc == NULL) 272 return NULL; 273 274 enc = PyString_AS_STRING(encoding); 275 for (codec = codec_list; codec->encoding[0]; codec++) 276 if (strcmp(codec->encoding, enc) == 0) 277 break; 278 279 if (codec->encoding[0] == '\0') { 280 PyErr_SetString(PyExc_LookupError, 281 "no such codec is supported."); 282 return NULL; 283 } 284 285 codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL); 286 if (codecobj == NULL) 287 return NULL; 288 289 r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL); 290 Py_DECREF(codecobj); 291 292 return r; 293 293 } 294 294 295 295 static struct PyMethodDef __methods[] = { 296 297 296 {"getcodec", (PyCFunction)getcodec, METH_O, ""}, 297 {NULL, NULL}, 298 298 }; 299 299 … … 301 301 register_maps(PyObject *module) 302 302 { 303 304 305 306 307 308 309 310 PyCObject_FromVoidPtr((void *)h, NULL));311 312 313 314 303 const struct dbcs_map *h; 304 305 for (h = mapping_list; h->charset[0] != '\0'; h++) { 306 char mhname[256] = "__map_"; 307 int r; 308 strcpy(mhname + sizeof("__map_") - 1, h->charset); 309 r = PyModule_AddObject(module, mhname, 310 PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL)); 311 if (r == -1) 312 return -1; 313 } 314 return 0; 315 315 } 316 316 … … 318 318 static DBCHAR 319 319 find_pairencmap(ucs2_t body, ucs2_t modifier, 320 320 const struct pair_encodemap *haystack, int haystacksize) 321 321 { 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 322 int pos, min, max; 323 ucs4_t value = body << 16 | modifier; 324 325 min = 0; 326 max = haystacksize; 327 328 for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) 329 if (value < haystack[pos].uniseq) { 330 if (max == pos) break; 331 else max = pos; 332 } 333 else if (value > haystack[pos].uniseq) { 334 if (min == pos) break; 335 else min = pos; 336 } 337 else 338 break; 339 340 if (value == haystack[pos].uniseq) 341 return haystack[pos].code; 342 else 343 return DBCINV; 344 344 } 345 345 #endif … … 347 347 #ifdef USING_IMPORTED_MAPS 348 348 #define IMPORT_MAP(locale, charset, encmap, decmap) \ 349 350 349 importmap("_codecs_" #locale, "__map_" #charset, \ 350 (const void**)encmap, (const void**)decmap) 351 351 352 352 static int 353 353 importmap(const char *modname, const char *symbol, 354 354 const void **encmap, const void **decmap) 355 355 { 356 357 358 359 360 361 362 363 364 365 else if (!PyCObject_Check(o)) {366 367 "map data must be a CObject.");368 369 370 371 372 map = PyCObject_AsVoidPtr(o);373 374 375 376 377 378 379 380 381 356 PyObject *o, *mod; 357 358 mod = PyImport_ImportModule((char *)modname); 359 if (mod == NULL) 360 return -1; 361 362 o = PyObject_GetAttrString(mod, (char*)symbol); 363 if (o == NULL) 364 goto errorexit; 365 else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) { 366 PyErr_SetString(PyExc_ValueError, 367 "map data must be a Capsule."); 368 goto errorexit; 369 } 370 else { 371 struct dbcs_map *map; 372 map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME); 373 if (encmap != NULL) 374 *encmap = map->encmap; 375 if (decmap != NULL) 376 *decmap = map->decmap; 377 Py_DECREF(o); 378 } 379 380 Py_DECREF(mod); 381 return 0; 382 382 383 383 errorexit: 384 385 384 Py_DECREF(mod); 385 return -1; 386 386 } 387 387 #endif 388 388 389 389 #ifdef __OS2__ 390 #define I_AM_A_MODULE_FOR(loc) 391 PyMODINIT_FUNC\392 init_codecs_##loc(void)\393 {\394 395 if (m != NULL)\396 (void)register_maps(m);\397 390 #define I_AM_A_MODULE_FOR(loc) \ 391 PyMODINIT_FUNC \ 392 init_codecs_##loc(void) \ 393 { \ 394 PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\ 395 if (m != NULL) \ 396 (void)register_maps(m); \ 397 } 398 398 #else 399 #define I_AM_A_MODULE_FOR(loc) 400 void\401 init_codecs_##loc(void)\402 {\403 404 if (m != NULL)\405 (void)register_maps(m);\406 399 #define I_AM_A_MODULE_FOR(loc) \ 400 void \ 401 init_codecs_##loc(void) \ 402 { \ 403 PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\ 404 if (m != NULL) \ 405 (void)register_maps(m); \ 406 } 407 407 #endif 408 408
Note:
See TracChangeset
for help on using the changeset viewer.